diff --git build-common.xml build-common.xml index 581ded1..e5d5b2f 100644 --- build-common.xml +++ build-common.xml @@ -96,7 +96,7 @@ + excludes="**/hive_*.jar,**/hive-*.jar"/> @@ -304,6 +304,9 @@ + + + diff --git build.properties build.properties index 8b98c90..682b5e9 100644 --- build.properties +++ build.properties @@ -79,7 +79,7 @@ common.jar=${hadoop.root}/lib/commons-httpclient-3.0.1.jar # (measured in milliseconds). Ignored if fork is disabled. When running # multiple tests inside the same Java VM (see forkMode), timeout # applies to the time that all tests use together, not to an individual test. -test.junit.timeout=36400000 +test.junit.timeout=43200000 # Use this property to selectively disable tests from the command line: # ant test -Dtest.junit.exclude="**/TestCliDriver.class" diff --git common/ivy.xml common/ivy.xml index bba0e13..5ebee66 100644 --- common/ivy.xml +++ common/ivy.xml @@ -59,7 +59,7 @@ conf="hadoop23.compile->default" transitive="false" /> @@ -67,14 +67,14 @@ diff --git hbase-handler/ivy.xml hbase-handler/ivy.xml index ed02fc2..8d48361 100644 --- hbase-handler/ivy.xml +++ hbase-handler/ivy.xml @@ -47,6 +47,35 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git ivy/libraries.properties ivy/libraries.properties index 7ac6778..6bdfd10 100644 --- ivy/libraries.properties +++ ivy/libraries.properties @@ -42,7 +42,8 @@ commons-logging.version=1.0.4 commons-logging-api.version=1.0.4 commons-pool.version=1.5.4 derby.version=10.4.2.0 -guava.version=11.0.2 +guava-hadoop20.version=r09 +guava-hadoop23.version=11.0.2 hbase.version=0.92.0 jackson.version=1.8.8 javaewah.version=0.3.2 diff --git ql/ivy.xml ql/ivy.xml index aa3b8ce..a220d7f 100644 --- ql/ivy.xml +++ ql/ivy.xml @@ -39,6 +39,12 @@ + + + + - @@ -82,6 +86,27 @@ + + + + + + + + + + + + + + + diff --git ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java index f190aaf..4e610c4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java +++ ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java @@ -45,6 +45,7 @@ import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; @@ -219,8 +220,25 @@ public class QTestUtil { conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, (new Path(dfsUriString, "/build/ql/test/data/warehouse/")).toString()); + int port = 0; + + try { + // Hadoop20 MiniMRCluster will return a proper port. + // Hadoop23 MiniMRCluster does not implement this method so use the default RM port. + port = mr.getJobTrackerPort(); + } catch (UnsupportedOperationException e) { + String address = + StringUtils.substringAfterLast(conf.get("yarn.resourcemanager.address"), ":"); + + if (StringUtils.isBlank(address)) { + throw new IllegalArgumentException("Invalid YARN resource manager port."); + } + + port = Integer.parseInt(address); + } + ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, - "localhost:" + mr.getJobTrackerPort()); + "localhost:" + port); } } @@ -938,12 +956,15 @@ public class QTestUtil { ".*USING 'java -cp.*", "^Deleted.*", }; - maskPatterns(patterns, (new File(logDir, tname + ".out")).getPath()); - int exitVal = executeDiffCommand((new File(logDir, tname + ".out")).getPath(), + + File f = new File(logDir, tname + ".out"); + + maskPatterns(patterns, f.getPath()); + int exitVal = executeDiffCommand(f.getPath(), outFileName, false); if (exitVal != 0 && overWrite) { - exitVal = overwriteResults((new File(logDir, tname + ".out")).getPath(), outFileName); + exitVal = overwriteResults(f.getPath(), outFileName); } return exitVal; diff --git ql/src/test/queries/clientpositive/auto_join14.q ql/src/test/queries/clientpositive/auto_join14.q index 9552c86..cbdc5e2 100644 --- ql/src/test/queries/clientpositive/auto_join14.q +++ ql/src/test/queries/clientpositive/auto_join14.q @@ -1,9 +1,12 @@ set hive.auto.convert.join = true; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; explain diff --git ql/src/test/queries/clientpositive/auto_join14_hadoop20.q ql/src/test/queries/clientpositive/auto_join14_hadoop20.q new file mode 100644 index 0000000..e7ef6cf --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join14_hadoop20.q @@ -0,0 +1,18 @@ + +set hive.auto.convert.join = true; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; + +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value; + +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value; + +SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1; diff --git ql/src/test/queries/clientpositive/combine2.q ql/src/test/queries/clientpositive/combine2.q index 0e5dba9..be94783 100644 --- ql/src/test/queries/clientpositive/combine2.q +++ ql/src/test/queries/clientpositive/combine2.q @@ -14,7 +14,7 @@ set hive.merge.smallfiles.avgsize=0; create table combine2(key string) partitioned by (value string); --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size diff --git ql/src/test/queries/clientpositive/combine2_hadoop20.q ql/src/test/queries/clientpositive/combine2_hadoop20.q new file mode 100644 index 0000000..2cc05ea --- /dev/null +++ ql/src/test/queries/clientpositive/combine2_hadoop20.q @@ -0,0 +1,48 @@ +USE default; + +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.min.split.size=256; +set mapred.min.split.size.per.node=256; +set mapred.min.split.size.per.rack=256; +set mapred.max.split.size=256; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set mapred.cache.shared.enabled=false; +set hive.merge.smallfiles.avgsize=0; + + + +create table combine2(key string) partitioned by (value string); + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) +select * from ( + select key, value from src where key < 10 + union all + select key, '|' as value from src where key = 11 + union all + select key, '2010-04-21 09:45:00' value from src where key = 19) s; + +show partitions combine2; + +explain +select key, value from combine2 where value is not null order by key; + +select key, value from combine2 where value is not null order by key; + +explain extended +select count(1) from combine2 where value is not null; + +select count(1) from combine2 where value is not null; + +explain +select ds, count(1) from srcpart where ds is not null group by ds; + +select ds, count(1) from srcpart where ds is not null group by ds; diff --git ql/src/test/queries/clientpositive/ctas.q ql/src/test/queries/clientpositive/ctas.q index d44e3c0..db91e2d 100644 --- ql/src/test/queries/clientpositive/ctas.q +++ ql/src/test/queries/clientpositive/ctas.q @@ -1,10 +1,4 @@ - - - - - - - +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; @@ -55,7 +49,8 @@ describe formatted nzhang_CTAS4; explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; diff --git ql/src/test/queries/clientpositive/ctas_hadoop20.q ql/src/test/queries/clientpositive/ctas_hadoop20.q new file mode 100644 index 0000000..4927dea --- /dev/null +++ ql/src/test/queries/clientpositive/ctas_hadoop20.q @@ -0,0 +1,68 @@ +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string); +select * from nzhang_Tmp; + +explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10; + +create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10; + +select * from nzhang_CTAS1; + +describe formatted nzhang_CTAS1; + + +explain create table nzhang_ctas2 as select * from src sort by key, value limit 10; + +create table nzhang_ctas2 as select * from src sort by key, value limit 10; + +select * from nzhang_ctas2; + +describe formatted nzhang_CTAS2; + + +explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10; + +create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10; + +select * from nzhang_ctas3; + +describe formatted nzhang_CTAS3; + + +explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2; + +create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2; + +select * from nzhang_ctas3; + +describe formatted nzhang_CTAS3; + + +explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10; + +create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10; + +select * from nzhang_ctas4; + +describe formatted nzhang_CTAS4; + +explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; + +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; + +create table nzhang_ctas6 (key string, `to` string); +insert overwrite table nzhang_ctas6 select key, value from src limit 10; +create table nzhang_ctas7 as select key, `to` from nzhang_ctas6; + + + + + + + + + diff --git ql/src/test/queries/clientpositive/input12.q ql/src/test/queries/clientpositive/input12.q index 083a445..72817d5 100644 --- ql/src/test/queries/clientpositive/input12.q +++ ql/src/test/queries/clientpositive/input12.q @@ -1,6 +1,9 @@ -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/input12_hadoop20.q ql/src/test/queries/clientpositive/input12_hadoop20.q new file mode 100644 index 0000000..86751af --- /dev/null +++ ql/src/test/queries/clientpositive/input12_hadoop20.q @@ -0,0 +1,23 @@ +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200; + +SELECT dest1.* FROM dest1; +SELECT dest2.* FROM dest2; +SELECT dest3.* FROM dest3; diff --git ql/src/test/queries/clientpositive/input39.q ql/src/test/queries/clientpositive/input39.q index 7d457e2..f77a670 100644 --- ql/src/test/queries/clientpositive/input39.q +++ ql/src/test/queries/clientpositive/input39.q @@ -1,4 +1,4 @@ - +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) create table t1(key string, value string) partitioned by (ds string); @@ -15,7 +15,8 @@ select key, value from src; set hive.test.mode=true; set hive.mapred.mode=strict; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; explain @@ -24,7 +25,8 @@ select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; set hive.test.mode=false; -set mapred.job.tracker; +set mapreduce.framework.name; +set mapreduce.jobtracker.address; diff --git ql/src/test/queries/clientpositive/input39_hadoop20.q ql/src/test/queries/clientpositive/input39_hadoop20.q new file mode 100644 index 0000000..3c5f2c5 --- /dev/null +++ ql/src/test/queries/clientpositive/input39_hadoop20.q @@ -0,0 +1,30 @@ +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string); +create table t2(key string, value string) partitioned by (ds string); + +insert overwrite table t1 partition (ds='1') +select key, value from src; + +insert overwrite table t1 partition (ds='2') +select key, value from src; + +insert overwrite table t2 partition (ds='1') +select key, value from src; + +set hive.test.mode=true; +set hive.mapred.mode=strict; +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +explain +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; + +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; + +set hive.test.mode=false; +set mapred.job.tracker; + + + diff --git ql/src/test/queries/clientpositive/join14.q ql/src/test/queries/clientpositive/join14.q index b4e3d81..b57d326 100644 --- ql/src/test/queries/clientpositive/join14.q +++ ql/src/test/queries/clientpositive/join14.q @@ -1,7 +1,9 @@ CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapred.job.tracker=localhost:58; set hive.exec.mode.local.auto=true; +set hive.exec.mode.local.auto.input.files.max=6; EXPLAIN FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 diff --git ql/src/test/queries/clientpositive/sample_islocalmode_hook.q ql/src/test/queries/clientpositive/sample_islocalmode_hook.q index 9610fc3..c2e36ff 100644 --- ql/src/test/queries/clientpositive/sample_islocalmode_hook.q +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook.q @@ -1,5 +1,3 @@ -USE default; - set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.max.split.size=300; set mapred.min.split.size=300; @@ -8,6 +6,8 @@ set mapred.min.split.size.per.rack=300; set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string); insert overwrite table sih_i_part partition (p='1') select key, value from src; @@ -16,28 +16,18 @@ insert overwrite table sih_i_part partition (p='3') select key+20000, value from create table sih_src as select key, value from sih_i_part order by key, value; create table sih_src2 as select key, value from sih_src order by key, value; -set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.VerifyIsLocalModeHook ; -set mapred.job.tracker=does.notexist.com:666; -set hive.exec.mode.local.auto.input.files.max=1; +set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.VerifyIsLocalModeHook; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; +set hive.sample.seednumber=7; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. --- Sample split, running locally limited by num tasks +-- sample split, running locally limited by num tasks select count(1) from sih_src tablesample(1 percent); -set mapred.job.tracker=does.notexist.com:666; - -- sample two tables -select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key; +select count(1) from sih_src tablesample(1 percent) a join sih_src2 tablesample(1 percent) b on a.key = b.key; set hive.exec.mode.local.auto.inputbytes.max=1000; -set hive.exec.mode.local.auto.input.files.max=4; -set mapred.job.tracker=does.notexist.com:666; -- sample split, running locally limited by max bytes select count(1) from sih_src tablesample(1 percent); diff --git ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q new file mode 100644 index 0000000..ce8bc01 --- /dev/null +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q @@ -0,0 +1,41 @@ +USE default; + +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.max.split.size=300; +set mapred.min.split.size=300; +set mapred.min.split.size.per.node=300; +set mapred.min.split.size.per.rack=300; +set hive.exec.mode.local.auto=true; +set hive.merge.smallfiles.avgsize=1; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string); +insert overwrite table sih_i_part partition (p='1') select key, value from src; +insert overwrite table sih_i_part partition (p='2') select key+10000, value from src; +insert overwrite table sih_i_part partition (p='3') select key+20000, value from src; +create table sih_src as select key, value from sih_i_part order by key, value; +create table sih_src2 as select key, value from sih_src order by key, value; + +set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.VerifyIsLocalModeHook ; +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto.input.files.max=1; + +-- Sample split, running locally limited by num tasks +select count(1) from sih_src tablesample(1 percent); + +-- sample two tables +select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key; + +set hive.exec.mode.local.auto.inputbytes.max=1000; +set hive.exec.mode.local.auto.input.files.max=4; + +-- sample split, running locally limited by max bytes +select count(1) from sih_src tablesample(1 percent); diff --git ql/src/test/queries/clientpositive/split_sample.q ql/src/test/queries/clientpositive/split_sample.q index e94bb9c..e772f51 100644 --- ql/src/test/queries/clientpositive/split_sample.q +++ ql/src/test/queries/clientpositive/split_sample.q @@ -1,20 +1,12 @@ -USE default; - set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.max.split.size=300; set mapred.min.split.size=300; set mapred.min.split.size.per.node=300; set mapred.min.split.size.per.rack=300; set hive.merge.smallfiles.avgsize=1; +set hive.sample.seednumber=7; --- create multiple file inputs (two enable multiple splits) -create table ss_i_part (key int, value string) partitioned by (p string); -insert overwrite table ss_i_part partition (p='1') select key, value from src; -insert overwrite table ss_i_part partition (p='2') select key, value from src; -insert overwrite table ss_i_part partition (p='3') select key, value from src; -create table ss_src2 as select key, value from ss_i_part; - --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) -- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size @@ -22,11 +14,17 @@ create table ss_src2 as select key, value from ss_i_part; -- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was -- fixed in MAPREDUCE-2046 which is included in 0.22. +-- create multiple file inputs (two enable multiple splits) +create table ss_i_part (key int, value string) partitioned by (p string); +insert overwrite table ss_i_part partition (p='1') select key, value from src; +insert overwrite table ss_i_part partition (p='2') select key, value from src; +insert overwrite table ss_i_part partition (p='3') select key, value from src; +create table ss_src2 as select key, value from ss_i_part; select count(1) from ss_src2 tablesample(1 percent); +set hive.sample.seednumber=0; -- sample first split desc ss_src2; -set hive.sample.seednumber=0; explain select key, value from ss_src2 tablesample(1 percent) limit 10; select key, value from ss_src2 tablesample(1 percent) limit 10; @@ -43,6 +41,7 @@ set hive.sample.seednumber=5; create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t; +set hive.sample.seednumber=7; -- sample more than one split explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; diff --git ql/src/test/queries/clientpositive/split_sample_hadoop20.q ql/src/test/queries/clientpositive/split_sample_hadoop20.q new file mode 100644 index 0000000..d7b7498 --- /dev/null +++ ql/src/test/queries/clientpositive/split_sample_hadoop20.q @@ -0,0 +1,80 @@ +USE default; + +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.max.split.size=300; +set mapred.min.split.size=300; +set mapred.min.split.size.per.node=300; +set mapred.min.split.size.per.rack=300; +set hive.merge.smallfiles.avgsize=1; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) +create table ss_i_part (key int, value string) partitioned by (p string); +insert overwrite table ss_i_part partition (p='1') select key, value from src; +insert overwrite table ss_i_part partition (p='2') select key, value from src; +insert overwrite table ss_i_part partition (p='3') select key, value from src; +create table ss_src2 as select key, value from ss_i_part; + +select count(1) from ss_src2 tablesample(1 percent); + +-- sample first split +desc ss_src2; +set hive.sample.seednumber=0; +explain select key, value from ss_src2 tablesample(1 percent) limit 10; +select key, value from ss_src2 tablesample(1 percent) limit 10; + +-- verify seed number of sampling +insert overwrite table ss_i_part partition (p='1') select key+10000, value from src; +insert overwrite table ss_i_part partition (p='2') select key+20000, value from src; +insert overwrite table ss_i_part partition (p='3') select key+30000, value from src; +create table ss_src3 as select key, value from ss_i_part; +set hive.sample.seednumber=3; +create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; +set hive.sample.seednumber=4; +create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; +set hive.sample.seednumber=5; +create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; +select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t; + +-- sample more than one split +explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; +select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; + +-- sample all splits +select count(1) from ss_src2 tablesample(100 percent); + +-- subquery +explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; +select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq; + +-- groupby +select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key; + +-- sample one of two tables: +create table ss_src1 as select * from ss_src2; +select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k; + +-- sample two tables +explain select * from ( +select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key +) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); + +select * from ( +select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key +) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199); + +-- shrink last split +explain select count(1) from ss_src2 tablesample(1 percent); +set mapred.max.split.size=300000; +set mapred.min.split.size=300000; +set mapred.min.split.size.per.node=300000; +set mapred.min.split.size.per.rack=300000; +select count(1) from ss_src2 tablesample(1 percent); +select count(1) from ss_src2 tablesample(50 percent); diff --git ql/src/test/resources/core-site.xml ql/src/test/resources/core-site.xml new file mode 100644 index 0000000..dde8ff9 --- /dev/null +++ ql/src/test/resources/core-site.xml @@ -0,0 +1,27 @@ + + + + + + + + + yarn.app.mapreduce.am.job.node-blacklisting.enable + false + + diff --git ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out index b199241..e1cd0c5 100644 --- ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out +++ ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out @@ -2,12 +2,4 @@ PREHOOK: query: FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exi PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask +FAILED: Execution Error, return code 20000 from org.apache.hadoop.hive.ql.exec.MapRedTask. Unable to initialize custom script. diff --git ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out index da9203d..e1cd0c5 100644 --- ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out +++ ql/src/test/results/clientnegative/mapreduce_stack_trace_turnoff.q.out @@ -2,4 +2,4 @@ PREHOOK: query: FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exi PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask +FAILED: Execution Error, return code 20000 from org.apache.hadoop.hive.ql.exec.MapRedTask. Unable to initialize custom script. diff --git ql/src/test/results/clientpositive/auto_join14.q.out ql/src/test/results/clientpositive/auto_join14.q.out index e5b3b61..d0531c4 100644 --- ql/src/test/results/clientpositive/auto_join14.q.out +++ ql/src/test/results/clientpositive/auto_join14.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out new file mode 100644 index 0000000..dc30130 --- /dev/null +++ ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out @@ -0,0 +1,290 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME srcpart)) (and (AND (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL srcpart) key)) (= (. (TOK_TABLE_OR_COL srcpart) ds) '2008-04-08')) (> (. (TOK_TABLE_OR_COL src) key) 100)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) value))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-2 depends on stages: Stage-0 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-1 + +STAGE PLANS: + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + srcpart + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + srcpart + TableScan + alias: srcpart + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + +PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +404554174174 diff --git ql/src/test/results/clientpositive/combine2.q.out ql/src/test/results/clientpositive/combine2.q.out index 73d4bc5..9ec72b9 100644 --- ql/src/test/results/clientpositive/combine2.q.out +++ ql/src/test/results/clientpositive/combine2.q.out @@ -7,7 +7,7 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table combine2(key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@combine2 -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size @@ -25,7 +25,7 @@ select * from ( PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) -- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 -- in an attempt to force the generation of multiple splits and multiple output files. -- However, Hadoop 0.20 is incapable of generating splits smaller than the block size @@ -259,7 +259,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -285,7 +285,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 1 + numFiles 3 numPartitions 8 numRows 3 partition_columns value @@ -305,7 +305,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -351,7 +351,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -397,7 +397,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -423,7 +423,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 1 + numFiles 3 numPartitions 8 numRows 3 partition_columns value @@ -443,7 +443,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -489,7 +489,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -535,7 +535,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -581,7 +581,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value diff --git ql/src/test/results/clientpositive/combine2_hadoop20.q.out ql/src/test/results/clientpositive/combine2_hadoop20.q.out new file mode 100644 index 0000000..c73f2c5 --- /dev/null +++ ql/src/test/results/clientpositive/combine2_hadoop20.q.out @@ -0,0 +1,769 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: create table combine2(key string) partitioned by (value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table combine2(key string) partitioned by (value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@combine2 +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) +select * from ( + select key, value from src where key < 10 + union all + select key, '|' as value from src where key = 11 + union all + select key, '2010-04-21 09:45:00' value from src where key = 19) s +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine2 +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) +select * from ( + select key, value from src where key < 10 + union all + select key, '|' as value from src where key = 11 + union all + select key, '2010-04-21 09:45:00' value from src where key = 19) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine2@value=2010-04-21 09%3A45%3A00 +POSTHOOK: Output: default@combine2@value=val_0 +POSTHOOK: Output: default@combine2@value=val_2 +POSTHOOK: Output: default@combine2@value=val_4 +POSTHOOK: Output: default@combine2@value=val_5 +POSTHOOK: Output: default@combine2@value=val_8 +POSTHOOK: Output: default@combine2@value=val_9 +POSTHOOK: Output: default@combine2@value=| +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: show partitions combine2 +PREHOOK: type: SHOWPARTITIONS +POSTHOOK: query: show partitions combine2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +value=2010-04-21 09%3A45%3A00 +value=val_0 +value=val_2 +value=val_4 +value=val_5 +value=val_8 +value=val_9 +value=| +PREHOOK: query: explain +select key, value from combine2 where value is not null order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value from combine2 where value is not null order by key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME combine2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + combine2 + TableScan + alias: combine2 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key, value from combine2 where value is not null order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +PREHOOK: Input: default@combine2@value=val_0 +PREHOOK: Input: default@combine2@value=val_2 +PREHOOK: Input: default@combine2@value=val_4 +PREHOOK: Input: default@combine2@value=val_5 +PREHOOK: Input: default@combine2@value=val_8 +PREHOOK: Input: default@combine2@value=val_9 +PREHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: query: select key, value from combine2 where value is not null order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +POSTHOOK: Input: default@combine2@value=val_0 +POSTHOOK: Input: default@combine2@value=val_2 +POSTHOOK: Input: default@combine2@value=val_4 +POSTHOOK: Input: default@combine2@value=val_5 +POSTHOOK: Input: default@combine2@value=val_8 +POSTHOOK: Input: default@combine2@value=val_9 +POSTHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +11 | +19 2010-04-21 09:45:00 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain extended +select count(1) from combine2 where value is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(1) from combine2 where value is not null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME combine2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + combine2 + TableScan + alias: combine2 + GatherStats: false + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: value=2010-04-21 09%3A45%3A00 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value 2010-04-21 09:45:00 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 2 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_0 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 3 + partition_columns value + rawDataSize 3 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 6 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_2 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_4 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_5 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_5 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 3 + partition_columns value + rawDataSize 3 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 6 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_8 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_8 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_9 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_9 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=| + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value | + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 2 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from combine2 where value is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +PREHOOK: Input: default@combine2@value=val_0 +PREHOOK: Input: default@combine2@value=val_2 +PREHOOK: Input: default@combine2@value=val_4 +PREHOOK: Input: default@combine2@value=val_5 +PREHOOK: Input: default@combine2@value=val_8 +PREHOOK: Input: default@combine2@value=val_9 +PREHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from combine2 where value is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +POSTHOOK: Input: default@combine2@value=val_0 +POSTHOOK: Input: default@combine2@value=val_2 +POSTHOOK: Input: default@combine2@value=val_4 +POSTHOOK: Input: default@combine2@value=val_5 +POSTHOOK: Input: default@combine2@value=val_8 +POSTHOOK: Input: default@combine2@value=val_9 +POSTHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +12 +PREHOOK: query: explain +select ds, count(1) from srcpart where ds is not null group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ds, count(1) from srcpart where ds is not null group by ds +POSTHOOK: type: QUERY +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Select Operator + expressions: + expr: ds + type: string + outputColumnNames: ds + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: ds + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +2008-04-08 1000 +2008-04-09 1000 diff --git ql/src/test/results/clientpositive/ctas.q.out ql/src/test/results/clientpositive/ctas.q.out index def351f..05457ab 100644 --- ql/src/test/results/clientpositive/ctas.q.out +++ ql/src/test/results/clientpositive/ctas.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table nzhang_Tmp(a int, b string) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table nzhang_Tmp(a int, b string) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@nzhang_Tmp PREHOOK: query: select * from nzhang_Tmp diff --git ql/src/test/results/clientpositive/ctas_hadoop20.q.out ql/src/test/results/clientpositive/ctas_hadoop20.q.out new file mode 100644 index 0000000..5e09435 --- /dev/null +++ ql/src/test/results/clientpositive/ctas_hadoop20.q.out @@ -0,0 +1,948 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@nzhang_Tmp +PREHOOK: query: select * from nzhang_Tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_tmp +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_Tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_tmp +#### A masked pattern was here #### +PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_CTAS1) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.nzhang_CTAS1 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: k string, value string + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_CTAS1 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_CTAS1 +PREHOOK: query: select * from nzhang_CTAS1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas1 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_CTAS1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS1 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +k string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 96 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas2) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.nzhang_ctas2 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_ctas2 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas2 +PREHOOK: query: select * from nzhang_ctas2 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas2 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS2 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 96 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas3) TOK_LIKETABLE (TOK_TABLESERIALIZER (TOK_SERDENAME "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) TOK_TBLRCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) 2) half_key) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) "_con") conb)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL half_key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL conb))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key / 2) + type: double + expr: concat(value, '_con') + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + name: default.nzhang_ctas3 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: half_key double, conb string + if not exists: false + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde name: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: nzhang_ctas3 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas3 +PREHOOK: query: select * from nzhang_ctas3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +0.0 val_0_con +0.0 val_0_con +0.0 val_0_con +1.0 val_2_con +2.0 val_4_con +2.5 val_5_con +2.5 val_5_con +2.5 val_5_con +4.0 val_8_con +4.5 val_9_con +PREHOOK: query: describe formatted nzhang_CTAS3 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS3 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +half_key double None +conb string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 120 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas3) TOK_IFNOTEXISTS TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 2)))) + +STAGE DEPENDENCIES: + +STAGE PLANS: +STAGE PLANS: +PREHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +PREHOOK: query: select * from nzhang_ctas3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +0.0 val_0_con +0.0 val_0_con +0.0 val_0_con +1.0 val_2_con +2.0 val_4_con +2.5 val_5_con +2.5 val_5_con +2.5 val_5_con +4.0 val_8_con +4.5 val_9_con +PREHOOK: query: describe formatted nzhang_CTAS3 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS3 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +half_key double None +conb string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 120 + totalSize 199 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas4) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD ','))) TOK_TBLTEXTFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.nzhang_ctas4 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + field delimiter: , + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_ctas4 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas4 +PREHOOK: query: select * from nzhang_ctas4 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas4 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas4 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS4 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS4 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 96 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + field.delim , + serialization.format , +PREHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas5) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD ',') (TOK_TABLEROWFORMATLINES '\012'))) TOK_TBLTEXTFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + field.delim , + line.delim + + name default.nzhang_ctas5 + serialization.format , + name: default.nzhang_ctas5 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + field delimiter: , + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + line delimiter: + + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_ctas5 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas5 +PREHOOK: query: create table nzhang_ctas6 (key string, `to` string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table nzhang_ctas6 (key string, `to` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@nzhang_ctas6 +PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_ctas6 +POSTHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas6 +POSTHOOK: Lineage: nzhang_ctas6.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_ctas6.to SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table nzhang_ctas7 as select key, `to` from nzhang_ctas6 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@nzhang_ctas6 +POSTHOOK: query: create table nzhang_ctas7 as select key, `to` from nzhang_ctas6 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@nzhang_ctas6 +POSTHOOK: Output: default@nzhang_ctas7 +POSTHOOK: Lineage: nzhang_ctas6.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_ctas6.to SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/input12.q.out ql/src/test/results/clientpositive/input12.q.out index 22164e2..e4bdf24 100644 --- ql/src/test/results/clientpositive/input12.q.out +++ ql/src/test/results/clientpositive/input12.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE diff --git ql/src/test/results/clientpositive/input12_hadoop20.q.out ql/src/test/results/clientpositive/input12_hadoop20.q.out new file mode 100644 index 0000000..b071f90 --- /dev/null +++ ql/src/test/results/clientpositive/input12_hadoop20.q.out @@ -0,0 +1,868 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest3 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL src) key) 100) (< (. (TOK_TABLE_OR_COL src) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest3) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '12')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL src) key) 200)))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 + Stage-6 + Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-4 depends on stages: Stage-0 + Stage-5 + Stage-7 + Stage-8 depends on stages: Stage-7 + Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 + Stage-12 + Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-10 depends on stages: Stage-1 + Stage-11 + Stage-13 + Stage-14 depends on stages: Stage-13 + Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 + Stage-18 + Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 + Stage-16 depends on stages: Stage-2 + Stage-17 + Stage-19 + Stage-20 depends on stages: Stage-19 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 100.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Filter Operator + predicate: + expr: ((key >= 100.0) and (key < 200.0)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Filter Operator + predicate: + expr: (key >= 200.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-8 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-15 + Conditional Operator + + Stage: Stage-12 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-10 + Stats-Aggr Operator + + Stage: Stage-11 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-13 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-14 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-21 + Conditional Operator + + Stage: Stage-18 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-2 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 12 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-16 + Stats-Aggr Operator + + Stage: Stage-17 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-19 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-20 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +PREHOOK: Output: default@dest3@ds=2008-04-08/hr=12 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Output: default@dest3@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +27 val_27 +98 val_98 +66 val_66 +37 val_37 +15 val_15 +82 val_82 +17 val_17 +0 val_0 +57 val_57 +20 val_20 +92 val_92 +47 val_47 +72 val_72 +4 val_4 +35 val_35 +54 val_54 +51 val_51 +65 val_65 +83 val_83 +12 val_12 +67 val_67 +84 val_84 +58 val_58 +8 val_8 +24 val_24 +42 val_42 +0 val_0 +96 val_96 +26 val_26 +51 val_51 +43 val_43 +95 val_95 +98 val_98 +85 val_85 +77 val_77 +0 val_0 +87 val_87 +15 val_15 +72 val_72 +90 val_90 +19 val_19 +10 val_10 +5 val_5 +58 val_58 +35 val_35 +95 val_95 +11 val_11 +34 val_34 +42 val_42 +78 val_78 +76 val_76 +41 val_41 +30 val_30 +64 val_64 +76 val_76 +74 val_74 +69 val_69 +33 val_33 +70 val_70 +5 val_5 +2 val_2 +35 val_35 +80 val_80 +44 val_44 +53 val_53 +90 val_90 +12 val_12 +5 val_5 +70 val_70 +24 val_24 +70 val_70 +83 val_83 +26 val_26 +67 val_67 +18 val_18 +9 val_9 +18 val_18 +97 val_97 +84 val_84 +28 val_28 +37 val_37 +90 val_90 +97 val_97 +PREHOOK: query: SELECT dest2.* FROM dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest2.* FROM dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +165 val_165 +193 val_193 +150 val_150 +128 val_128 +146 val_146 +152 val_152 +145 val_145 +166 val_166 +153 val_153 +193 val_193 +174 val_174 +199 val_199 +174 val_174 +162 val_162 +167 val_167 +195 val_195 +113 val_113 +155 val_155 +128 val_128 +149 val_149 +129 val_129 +170 val_170 +157 val_157 +111 val_111 +169 val_169 +125 val_125 +192 val_192 +187 val_187 +176 val_176 +138 val_138 +103 val_103 +176 val_176 +137 val_137 +180 val_180 +181 val_181 +138 val_138 +179 val_179 +172 val_172 +129 val_129 +158 val_158 +119 val_119 +197 val_197 +100 val_100 +199 val_199 +191 val_191 +165 val_165 +120 val_120 +131 val_131 +156 val_156 +196 val_196 +197 val_197 +187 val_187 +137 val_137 +169 val_169 +179 val_179 +118 val_118 +134 val_134 +138 val_138 +118 val_118 +177 val_177 +168 val_168 +143 val_143 +160 val_160 +195 val_195 +119 val_119 +149 val_149 +138 val_138 +103 val_103 +113 val_113 +167 val_167 +116 val_116 +191 val_191 +128 val_128 +193 val_193 +104 val_104 +175 val_175 +105 val_105 +190 val_190 +114 val_114 +164 val_164 +125 val_125 +164 val_164 +187 val_187 +104 val_104 +163 val_163 +119 val_119 +199 val_199 +120 val_120 +169 val_169 +178 val_178 +136 val_136 +172 val_172 +133 val_133 +175 val_175 +189 val_189 +134 val_134 +100 val_100 +146 val_146 +186 val_186 +167 val_167 +183 val_183 +152 val_152 +194 val_194 +126 val_126 +169 val_169 +PREHOOK: query: SELECT dest3.* FROM dest3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest3@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest3.* FROM dest3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest3@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +238 2008-04-08 12 +311 2008-04-08 12 +409 2008-04-08 12 +255 2008-04-08 12 +278 2008-04-08 12 +484 2008-04-08 12 +265 2008-04-08 12 +401 2008-04-08 12 +273 2008-04-08 12 +224 2008-04-08 12 +369 2008-04-08 12 +213 2008-04-08 12 +406 2008-04-08 12 +429 2008-04-08 12 +374 2008-04-08 12 +469 2008-04-08 12 +495 2008-04-08 12 +327 2008-04-08 12 +281 2008-04-08 12 +277 2008-04-08 12 +209 2008-04-08 12 +403 2008-04-08 12 +417 2008-04-08 12 +430 2008-04-08 12 +252 2008-04-08 12 +292 2008-04-08 12 +219 2008-04-08 12 +287 2008-04-08 12 +338 2008-04-08 12 +446 2008-04-08 12 +459 2008-04-08 12 +394 2008-04-08 12 +237 2008-04-08 12 +482 2008-04-08 12 +413 2008-04-08 12 +494 2008-04-08 12 +207 2008-04-08 12 +466 2008-04-08 12 +208 2008-04-08 12 +399 2008-04-08 12 +396 2008-04-08 12 +247 2008-04-08 12 +417 2008-04-08 12 +489 2008-04-08 12 +377 2008-04-08 12 +397 2008-04-08 12 +309 2008-04-08 12 +365 2008-04-08 12 +266 2008-04-08 12 +439 2008-04-08 12 +342 2008-04-08 12 +367 2008-04-08 12 +325 2008-04-08 12 +475 2008-04-08 12 +203 2008-04-08 12 +339 2008-04-08 12 +455 2008-04-08 12 +311 2008-04-08 12 +316 2008-04-08 12 +302 2008-04-08 12 +205 2008-04-08 12 +438 2008-04-08 12 +345 2008-04-08 12 +489 2008-04-08 12 +378 2008-04-08 12 +221 2008-04-08 12 +280 2008-04-08 12 +427 2008-04-08 12 +277 2008-04-08 12 +208 2008-04-08 12 +356 2008-04-08 12 +399 2008-04-08 12 +382 2008-04-08 12 +498 2008-04-08 12 +386 2008-04-08 12 +437 2008-04-08 12 +469 2008-04-08 12 +286 2008-04-08 12 +459 2008-04-08 12 +239 2008-04-08 12 +213 2008-04-08 12 +216 2008-04-08 12 +430 2008-04-08 12 +278 2008-04-08 12 +289 2008-04-08 12 +221 2008-04-08 12 +318 2008-04-08 12 +332 2008-04-08 12 +311 2008-04-08 12 +275 2008-04-08 12 +241 2008-04-08 12 +333 2008-04-08 12 +284 2008-04-08 12 +230 2008-04-08 12 +260 2008-04-08 12 +404 2008-04-08 12 +384 2008-04-08 12 +489 2008-04-08 12 +353 2008-04-08 12 +373 2008-04-08 12 +272 2008-04-08 12 +217 2008-04-08 12 +348 2008-04-08 12 +466 2008-04-08 12 +411 2008-04-08 12 +230 2008-04-08 12 +208 2008-04-08 12 +348 2008-04-08 12 +463 2008-04-08 12 +431 2008-04-08 12 +496 2008-04-08 12 +322 2008-04-08 12 +468 2008-04-08 12 +393 2008-04-08 12 +454 2008-04-08 12 +298 2008-04-08 12 +418 2008-04-08 12 +327 2008-04-08 12 +230 2008-04-08 12 +205 2008-04-08 12 +404 2008-04-08 12 +436 2008-04-08 12 +469 2008-04-08 12 +468 2008-04-08 12 +308 2008-04-08 12 +288 2008-04-08 12 +481 2008-04-08 12 +457 2008-04-08 12 +282 2008-04-08 12 +318 2008-04-08 12 +318 2008-04-08 12 +409 2008-04-08 12 +470 2008-04-08 12 +369 2008-04-08 12 +316 2008-04-08 12 +413 2008-04-08 12 +490 2008-04-08 12 +364 2008-04-08 12 +395 2008-04-08 12 +282 2008-04-08 12 +238 2008-04-08 12 +419 2008-04-08 12 +307 2008-04-08 12 +435 2008-04-08 12 +277 2008-04-08 12 +273 2008-04-08 12 +306 2008-04-08 12 +224 2008-04-08 12 +309 2008-04-08 12 +389 2008-04-08 12 +327 2008-04-08 12 +242 2008-04-08 12 +369 2008-04-08 12 +392 2008-04-08 12 +272 2008-04-08 12 +331 2008-04-08 12 +401 2008-04-08 12 +242 2008-04-08 12 +452 2008-04-08 12 +226 2008-04-08 12 +497 2008-04-08 12 +402 2008-04-08 12 +396 2008-04-08 12 +317 2008-04-08 12 +395 2008-04-08 12 +336 2008-04-08 12 +229 2008-04-08 12 +233 2008-04-08 12 +472 2008-04-08 12 +322 2008-04-08 12 +498 2008-04-08 12 +321 2008-04-08 12 +430 2008-04-08 12 +489 2008-04-08 12 +458 2008-04-08 12 +223 2008-04-08 12 +492 2008-04-08 12 +449 2008-04-08 12 +218 2008-04-08 12 +228 2008-04-08 12 +453 2008-04-08 12 +209 2008-04-08 12 +468 2008-04-08 12 +342 2008-04-08 12 +230 2008-04-08 12 +368 2008-04-08 12 +296 2008-04-08 12 +216 2008-04-08 12 +367 2008-04-08 12 +344 2008-04-08 12 +274 2008-04-08 12 +219 2008-04-08 12 +239 2008-04-08 12 +485 2008-04-08 12 +223 2008-04-08 12 +256 2008-04-08 12 +263 2008-04-08 12 +487 2008-04-08 12 +480 2008-04-08 12 +401 2008-04-08 12 +288 2008-04-08 12 +244 2008-04-08 12 +438 2008-04-08 12 +467 2008-04-08 12 +432 2008-04-08 12 +202 2008-04-08 12 +316 2008-04-08 12 +229 2008-04-08 12 +469 2008-04-08 12 +463 2008-04-08 12 +280 2008-04-08 12 +283 2008-04-08 12 +331 2008-04-08 12 +235 2008-04-08 12 +321 2008-04-08 12 +335 2008-04-08 12 +466 2008-04-08 12 +366 2008-04-08 12 +403 2008-04-08 12 +483 2008-04-08 12 +257 2008-04-08 12 +406 2008-04-08 12 +409 2008-04-08 12 +406 2008-04-08 12 +401 2008-04-08 12 +258 2008-04-08 12 +203 2008-04-08 12 +262 2008-04-08 12 +348 2008-04-08 12 +424 2008-04-08 12 +396 2008-04-08 12 +201 2008-04-08 12 +217 2008-04-08 12 +431 2008-04-08 12 +454 2008-04-08 12 +478 2008-04-08 12 +298 2008-04-08 12 +431 2008-04-08 12 +424 2008-04-08 12 +382 2008-04-08 12 +397 2008-04-08 12 +480 2008-04-08 12 +291 2008-04-08 12 +351 2008-04-08 12 +255 2008-04-08 12 +438 2008-04-08 12 +414 2008-04-08 12 +200 2008-04-08 12 +491 2008-04-08 12 +237 2008-04-08 12 +439 2008-04-08 12 +360 2008-04-08 12 +248 2008-04-08 12 +479 2008-04-08 12 +305 2008-04-08 12 +417 2008-04-08 12 +444 2008-04-08 12 +429 2008-04-08 12 +443 2008-04-08 12 +323 2008-04-08 12 +325 2008-04-08 12 +277 2008-04-08 12 +230 2008-04-08 12 +478 2008-04-08 12 +468 2008-04-08 12 +310 2008-04-08 12 +317 2008-04-08 12 +333 2008-04-08 12 +493 2008-04-08 12 +460 2008-04-08 12 +207 2008-04-08 12 +249 2008-04-08 12 +265 2008-04-08 12 +480 2008-04-08 12 +353 2008-04-08 12 +214 2008-04-08 12 +462 2008-04-08 12 +233 2008-04-08 12 +406 2008-04-08 12 +454 2008-04-08 12 +375 2008-04-08 12 +401 2008-04-08 12 +421 2008-04-08 12 +407 2008-04-08 12 +384 2008-04-08 12 +256 2008-04-08 12 +384 2008-04-08 12 +379 2008-04-08 12 +462 2008-04-08 12 +492 2008-04-08 12 +298 2008-04-08 12 +341 2008-04-08 12 +498 2008-04-08 12 +458 2008-04-08 12 +362 2008-04-08 12 +285 2008-04-08 12 +348 2008-04-08 12 +273 2008-04-08 12 +281 2008-04-08 12 +344 2008-04-08 12 +469 2008-04-08 12 +315 2008-04-08 12 +448 2008-04-08 12 +348 2008-04-08 12 +307 2008-04-08 12 +414 2008-04-08 12 +477 2008-04-08 12 +222 2008-04-08 12 +403 2008-04-08 12 +400 2008-04-08 12 +200 2008-04-08 12 diff --git ql/src/test/results/clientpositive/input39.q.out ql/src/test/results/clientpositive/input39.q.out index 9a84aa5..9adbafa 100644 --- ql/src/test/results/clientpositive/input39.q.out +++ ql/src/test/results/clientpositive/input39.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table t1(key string, value string) partitioned by (ds string) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table t1(key string, value string) partitioned by (ds string) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@t1 PREHOOK: query: create table t2(key string, value string) partitioned by (ds string) @@ -186,4 +192,5 @@ POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:va POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 18 -mapred.job.tracker=does.notexist.com:666 +mapreduce.framework.name=yarn +mapreduce.jobtracker.address=local diff --git ql/src/test/results/clientpositive/input39_hadoop20.q.out ql/src/test/results/clientpositive/input39_hadoop20.q.out new file mode 100644 index 0000000..5a587fc --- /dev/null +++ ql/src/test/results/clientpositive/input39_hadoop20.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2(key string, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table t2(key string, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t2 +PREHOOK: query: insert overwrite table t1 partition (ds='1') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@ds=1 +POSTHOOK: query: insert overwrite table t1 partition (ds='1') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table t1 partition (ds='2') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@ds=2 +POSTHOOK: query: insert overwrite table t1 partition (ds='2') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@ds=2 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table t2 partition (ds='1') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t2@ds=1 +POSTHOOK: query: insert overwrite table t2 partition (ds='1') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t2@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL t1) ds) '1') (= (. (TOK_TABLE_OR_COL t2) ds) '1'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Filter Operator + predicate: + expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: ds + type: string + t2 + TableScan + alias: t2 + Filter Operator + predicate: + expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: ds + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col2} + 1 {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col2, _col7 + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1@ds=1 +PREHOOK: Input: default@t2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1@ds=1 +POSTHOOK: Input: default@t2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +18 +mapred.job.tracker=localhost:58 diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out index d623d9f..6e40a4a 100644 --- ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out +++ ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out @@ -1,11 +1,11 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- create file inputs +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE -POSTHOOK: query: -- create file inputs +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@sih_i_part @@ -75,29 +75,22 @@ POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchem POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. --- Sample split, running locally limited by num tasks +PREHOOK: query: -- sample split, running locally limited by num tasks select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### -1500 +25 PREHOOK: query: -- sample two tables -select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key +select count(1) from sih_src tablesample(1 percent) a join sih_src2 tablesample(1 percent) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@sih_src PREHOOK: Input: default@sih_src2 #### A masked pattern was here #### -3084 +49 PREHOOK: query: -- sample split, running locally limited by max bytes select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### -1500 +25 diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out new file mode 100644 index 0000000..c74812b --- /dev/null +++ ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out @@ -0,0 +1,112 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@sih_i_part +PREHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@sih_i_part@p=1 +POSTHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@sih_i_part@p=1 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table sih_i_part partition (p='2') select key+10000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@sih_i_part@p=2 +POSTHOOK: query: insert overwrite table sih_i_part partition (p='2') select key+10000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@sih_i_part@p=2 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table sih_i_part partition (p='3') select key+20000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@sih_i_part@p=3 +POSTHOOK: query: insert overwrite table sih_i_part partition (p='3') select key+20000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@sih_i_part@p=3 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table sih_src as select key, value from sih_i_part order by key, value +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@sih_i_part@p=1 +PREHOOK: Input: default@sih_i_part@p=2 +PREHOOK: Input: default@sih_i_part@p=3 +POSTHOOK: query: create table sih_src as select key, value from sih_i_part order by key, value +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@sih_i_part@p=1 +POSTHOOK: Input: default@sih_i_part@p=2 +POSTHOOK: Input: default@sih_i_part@p=3 +POSTHOOK: Output: default@sih_src +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table sih_src2 as select key, value from sih_src order by key, value +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@sih_src +POSTHOOK: query: create table sih_src2 as select key, value from sih_src order by key, value +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@sih_src +POSTHOOK: Output: default@sih_src2 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Sample split, running locally limited by num tasks +select count(1) from sih_src tablesample(1 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@sih_src +#### A masked pattern was here #### +1500 +PREHOOK: query: -- sample two tables +select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@sih_src +PREHOOK: Input: default@sih_src2 +#### A masked pattern was here #### +3084 +PREHOOK: query: -- sample split, running locally limited by max bytes +select count(1) from sih_src tablesample(1 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@sih_src +#### A masked pattern was here #### +1500 diff --git ql/src/test/results/clientpositive/split_sample.q.out ql/src/test/results/clientpositive/split_sample.q.out index 4ed8528..818435d 100644 --- ql/src/test/results/clientpositive/split_sample.q.out +++ ql/src/test/results/clientpositive/split_sample.q.out @@ -1,11 +1,23 @@ -PREHOOK: query: USE default -PREHOOK: type: SWITCHDATABASE -POSTHOOK: query: USE default -POSTHOOK: type: SWITCHDATABASE -PREHOOK: query: -- create multiple file inputs (two enable multiple splits) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) create table ss_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE -POSTHOOK: query: -- create multiple file inputs (two enable multiple splits) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) create table ss_i_part (key int, value string) partitioned by (p string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ss_i_part @@ -62,27 +74,11 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - -select count(1) from ss_src2 tablesample(1 percent) +PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@ss_src2 #### A masked pattern was here #### -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.22) --- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 --- in an attempt to force the generation of multiple splits and multiple output files. --- However, Hadoop 0.20 is incapable of generating splits smaller than the block size --- when using CombineFileInputFormat, so only one split is generated. This has a --- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was --- fixed in MAPREDUCE-2046 which is included in 0.22. - -select count(1) from ss_src2 tablesample(1 percent) +POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) POSTHOOK: type: QUERY POSTHOOK: Input: default@ss_src2 #### A masked pattern was here #### @@ -92,7 +88,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 +20 PREHOOK: query: -- sample first split desc ss_src2 PREHOOK: type: DESCTABLE @@ -169,16 +165,16 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 val_238 -86 val_86 +17 val_17 +113 val_113 +155 val_155 +203 val_203 +339 val_339 +0 val_0 +455 val_455 +128 val_128 311 val_311 -27 val_27 -165 val_165 -409 val_409 -255 val_255 -278 val_278 -98 val_98 -484 val_484 +316 val_316 PREHOOK: query: -- verify seed number of sampling insert overwrite table ss_i_part partition (p='1') select key+10000, value from src PREHOOK: type: QUERY @@ -339,7 +335,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -320 +814 PREHOOK: query: -- sample more than one split explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 PREHOOK: type: QUERY @@ -398,7 +394,7 @@ STAGE PLANS: Percentage Sample: ss_src2 percentage: 70.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Group By Operator aggregations: @@ -444,7 +440,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -309 +262 PREHOOK: query: -- sample all splits select count(1) from ss_src2 tablesample(100 percent) PREHOOK: type: QUERY @@ -515,7 +511,7 @@ STAGE PLANS: Percentage Sample: subq:ss_src2 percentage: 1.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Extract Limit @@ -556,16 +552,16 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -238 -86 -311 -27 -165 -409 -255 -278 -98 -484 +469 +156 +436 +43 +404 +51 +131 +120 +205 +230 PREHOOK: query: -- groupby select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key PREHOOK: type: QUERY @@ -588,315 +584,26 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 3 -2 1 -4 1 -5 3 -8 1 -9 1 -10 1 -11 1 -12 2 -15 2 -17 1 -18 2 -19 1 -20 1 -24 2 -26 2 -27 1 -28 1 -30 1 -33 1 -34 1 -35 3 -37 2 -41 1 -42 2 43 1 -44 1 -47 1 -51 2 -53 1 -54 1 -57 1 -58 2 -64 1 -65 1 -66 1 -67 2 -69 1 -70 3 -72 2 -74 1 -76 2 -77 1 -78 1 -80 1 -82 1 -83 2 -84 2 -85 1 -86 1 -87 1 -90 3 -92 1 -95 2 -96 1 -97 2 -98 2 -100 2 -103 2 -104 2 -105 1 -111 1 -113 2 -114 1 -116 1 -118 2 -119 3 -120 2 -125 2 -126 1 -128 3 -129 2 +51 1 +95 1 +98 1 +120 1 131 1 -133 1 -134 2 -136 1 -137 2 -138 4 -143 1 -145 1 -146 2 -149 2 -150 1 -152 2 -153 1 -155 1 156 1 -157 1 -158 1 -160 1 -162 1 -163 1 -164 2 -165 2 -166 1 -167 3 -168 1 -169 4 -170 1 -172 2 -174 2 -175 2 -176 2 -177 1 -178 1 -179 2 -180 1 -181 1 -183 1 -186 1 -187 3 -189 1 -190 1 -191 2 -192 1 -193 3 -194 1 -195 2 196 1 -197 2 -199 3 -200 2 -201 1 -202 1 -203 2 -205 2 -207 2 -208 3 -209 2 -213 2 -214 1 -216 2 -217 2 -218 1 -219 2 -221 2 -222 1 -223 2 -224 2 -226 1 -228 1 -229 2 -230 5 -233 2 -235 1 -237 2 -238 2 -239 2 -241 1 -242 2 -244 1 -247 1 -248 1 -249 1 -252 1 -255 2 -256 2 -257 1 -258 1 -260 1 -262 1 -263 1 -265 2 -266 1 -272 2 -273 3 -274 1 -275 1 -277 4 -278 2 -280 2 -281 2 -282 2 -283 1 -284 1 -285 1 -286 1 -287 1 -288 2 -289 1 -291 1 -292 1 -296 1 -298 3 -302 1 -305 1 -306 1 -307 2 +197 1 +205 1 +230 1 +282 1 +288 1 308 1 -309 2 -310 1 -311 3 -315 1 -316 3 -317 2 -318 3 -321 2 -322 2 -323 1 -325 2 -327 3 -331 2 -332 1 -333 2 -335 1 -336 1 -338 1 -339 1 -341 1 -342 2 -344 2 -345 1 -348 5 -351 1 -353 2 -356 1 -360 1 -362 1 -364 1 -365 1 -366 1 -367 2 -368 1 -369 3 -373 1 -374 1 -375 1 -377 1 -378 1 -379 1 -382 2 -384 3 -386 1 -389 1 -392 1 -393 1 -394 1 -395 2 -396 3 -397 2 -399 2 -400 1 -401 5 -402 1 -403 3 -404 2 -406 4 -407 1 -409 3 -411 1 -413 2 -414 2 -417 3 -418 1 -419 1 -421 1 -424 2 -427 1 -429 2 -430 3 -431 3 -432 1 -435 1 +404 1 436 1 -437 1 -438 3 -439 2 -443 1 -444 1 -446 1 -448 1 -449 1 -452 1 -453 1 -454 3 -455 1 457 1 -458 2 -459 2 -460 1 -462 2 -463 2 -466 3 -467 1 -468 4 -469 5 -470 1 -472 1 -475 1 -477 1 -478 2 -479 1 -480 3 +468 1 +469 1 481 1 -482 1 -483 1 -484 1 -485 1 -487 1 -489 4 -490 1 -491 1 -492 2 -493 1 -494 1 -495 1 -496 1 -497 1 -498 3 PREHOOK: query: -- sample one of two tables: create table ss_src1 as select * from ss_src2 PREHOOK: type: CREATETABLE_AS_SELECT @@ -940,1041 +647,39 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -2 -2 -2 -4 -4 -4 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -5 -8 -8 -8 -9 -9 -9 -10 -10 -10 -11 -11 -11 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -12 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -15 -17 -17 -17 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -18 -19 -19 -19 -20 -20 -20 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -24 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -26 -27 -27 -27 -28 -28 -28 -30 -30 -30 -33 -33 -33 -34 -34 -34 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -35 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -37 -41 -41 -41 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 -42 43 43 43 -44 -44 -44 -47 -47 -47 -51 -51 -51 51 51 51 51 51 51 -51 -51 -51 -53 -53 -53 -54 -54 -54 -57 -57 -57 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -58 -64 -64 -64 -65 -65 -65 -66 -66 -66 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -67 -69 -69 -69 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -70 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -72 -74 -74 -74 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -76 -77 -77 -77 -78 -78 -78 -80 -80 -80 -82 -82 -82 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -83 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -84 -85 -85 -85 -86 -86 -86 -87 -87 -87 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -90 -92 -92 -92 -95 -95 -95 -95 -95 95 95 95 95 95 95 -95 -96 -96 -96 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -97 -98 -98 -98 98 98 98 98 98 98 -98 -98 -98 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -100 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -103 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -104 -105 -105 -105 -111 -111 -111 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -113 -114 -114 -114 -116 -116 -116 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -118 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -119 -120 120 120 120 120 120 120 -120 -120 -120 -120 -120 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -125 -126 -126 -126 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -128 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 -129 131 131 131 -133 -133 -133 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -134 -136 -136 -136 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -137 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -138 -143 -143 -143 -145 -145 -145 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -146 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -149 -150 -150 -150 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -152 -153 -153 -153 -155 -155 -155 156 156 156 -157 -157 -157 -158 -158 -158 -160 -160 -160 -162 -162 -162 -163 -163 -163 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -164 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -165 -166 -166 -166 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -167 -168 -168 -168 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -169 -170 -170 -170 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -172 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -174 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -175 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -176 -177 -177 -177 -178 -178 -178 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -179 -180 -180 -180 -181 -181 -181 -183 -183 -183 -186 -186 -186 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -187 -189 -189 -189 -190 -190 -190 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -191 -192 -192 -192 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -193 -194 -194 -194 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 -195 196 196 196 @@ -1984,291 +689,12 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(n 197 197 197 -197 -197 -197 -197 -197 -197 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -199 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -200 -201 -201 -201 -202 -202 -202 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -203 -205 -205 -205 -205 -205 -205 205 205 205 205 205 205 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -207 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -208 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -209 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -213 -214 -214 -214 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -216 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -217 -218 -218 -218 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -219 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -221 -222 -222 -222 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -223 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -224 -226 -226 -226 -228 -228 -228 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -229 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 230 230 230 @@ -2284,1475 +710,33 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(n 230 230 230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -230 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -233 -235 -235 -235 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -237 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -238 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -239 -241 -241 -241 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -242 -244 -244 -244 -247 -247 -247 -248 -248 -248 -249 -249 -249 -252 -252 -252 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -255 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -256 -257 -257 -257 -258 -258 -258 -260 -260 -260 -262 -262 -262 -263 -263 -263 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -265 -266 -266 -266 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -272 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -273 -274 -274 -274 -275 -275 -275 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -277 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -278 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -280 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -281 -282 -282 282 282 282 282 282 282 -282 -282 -282 -282 -283 -283 -283 -284 -284 -284 -285 -285 -285 -286 -286 -286 -287 -287 -287 -288 -288 -288 -288 288 288 288 288 288 288 -288 -288 -289 -289 -289 -291 -291 -291 -292 -292 -292 -296 -296 -296 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -298 -302 -302 -302 -305 -305 -305 -306 -306 -306 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 -307 308 308 308 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -309 -310 -310 -310 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -311 -315 -315 -315 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -316 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -317 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -318 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -321 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -322 -323 -323 -323 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -325 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -327 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -331 -332 -332 -332 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -333 -335 -335 -335 -336 -336 -336 -338 -338 -338 -339 -339 -339 -341 -341 -341 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -342 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -344 -345 -345 -345 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -348 -351 -351 -351 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -353 -356 -356 -356 -360 -360 -360 -362 -362 -362 -364 -364 -364 -365 -365 -365 -366 -366 -366 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -367 -368 -368 -368 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -369 -373 -373 -373 -374 -374 -374 -375 -375 -375 -377 -377 -377 -378 -378 -378 -379 -379 -379 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -382 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -384 -386 -386 -386 -389 -389 -389 -392 -392 -392 -393 -393 -393 -394 -394 -394 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -395 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -396 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -397 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -399 -400 -400 -400 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -401 -402 -402 -402 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -403 -404 -404 -404 -404 -404 -404 404 404 404 404 404 404 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -406 -407 -407 -407 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -409 -411 -411 -411 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -413 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -414 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -417 -418 -418 -418 -419 -419 -419 -421 -421 -421 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -424 -427 -427 -427 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -429 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -430 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -431 -432 -432 -432 -435 -435 -435 436 436 436 -437 -437 -437 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -438 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -439 -443 -443 -443 -444 -444 -444 -446 -446 -446 -448 -448 -448 -449 -449 -449 -452 -452 -452 -453 -453 -453 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -454 -455 -455 -455 457 457 457 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -458 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -459 -460 -460 -460 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -462 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -463 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -466 -467 -467 -467 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 -468 468 468 468 @@ -3765,70 +749,6 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(n 468 468 468 -468 -468 -468 -468 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 -469 469 469 469 @@ -3844,186 +764,9 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(n 469 469 469 -470 -470 -470 -472 -472 -472 -475 -475 -475 -477 -477 -477 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -478 -479 -479 -479 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 -480 481 481 481 -482 -482 -482 -483 -483 -483 -484 -484 -484 -485 -485 -485 -487 -487 -487 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -489 -490 -490 -490 -491 -491 -491 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -492 -493 -493 -493 -494 -494 -494 -495 -495 -495 -496 -496 -496 -497 -497 -497 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 -498 PREHOOK: query: -- sample two tables explain select * from ( select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key @@ -4090,10 +833,10 @@ STAGE PLANS: Percentage Sample: subq:t1 percentage: 80.0 - seed number: 5 + seed number: 7 subq:t2 percentage: 2.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Join Operator condition map: @@ -4159,33 +902,15 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 -199 199 +199 NULL +199 NULL +199 NULL +199 NULL +199 NULL +199 NULL +199 NULL +199 NULL +199 NULL PREHOOK: query: -- shrink last split explain select count(1) from ss_src2 tablesample(1 percent) PREHOOK: type: QUERY @@ -4234,7 +959,7 @@ STAGE PLANS: Percentage Sample: ss_src2 percentage: 1.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Group By Operator aggregations: @@ -4279,7 +1004,7 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -500 +52 PREHOOK: query: select count(1) from ss_src2 tablesample(50 percent) PREHOOK: type: QUERY PREHOOK: Input: default@ss_src2 @@ -4300,4 +1025,4 @@ POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -1000 +774 diff --git ql/src/test/results/clientpositive/split_sample_hadoop20.q.out ql/src/test/results/clientpositive/split_sample_hadoop20.q.out new file mode 100644 index 0000000..a869a74 --- /dev/null +++ ql/src/test/results/clientpositive/split_sample_hadoop20.q.out @@ -0,0 +1,4303 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) +create table ss_i_part (key int, value string) partitioned by (p string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) +create table ss_i_part (key int, value string) partitioned by (p string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@ss_i_part +PREHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss_i_part@p=1 +POSTHOOK: query: insert overwrite table ss_i_part partition (p='1') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss_i_part@p=1 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss_i_part@p=2 +POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss_i_part@p=2 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss_i_part@p=3 +POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss_i_part@p=3 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table ss_src2 as select key, value from ss_i_part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ss_i_part@p=1 +PREHOOK: Input: default@ss_i_part@p=2 +PREHOOK: Input: default@ss_i_part@p=3 +POSTHOOK: query: create table ss_src2 as select key, value from ss_i_part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ss_i_part@p=1 +POSTHOOK: Input: default@ss_i_part@p=2 +POSTHOOK: Input: default@ss_i_part@p=3 +POSTHOOK: Output: default@ss_src2 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +500 +PREHOOK: query: -- sample first split +desc ss_src2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: -- sample first split +desc ss_src2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +key int +value string +PREHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value from ss_src2 tablesample(1 percent) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + ss_src2 + TableScan + alias: ss_src2 + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Percentage Sample: + ss_src2 + percentage: 1.0 + seed number: 0 + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from ss_src2 tablesample(1 percent) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +409 val_409 +255 val_255 +278 val_278 +98 val_98 +484 val_484 +PREHOOK: query: -- verify seed number of sampling +insert overwrite table ss_i_part partition (p='1') select key+10000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss_i_part@p=1 +POSTHOOK: query: -- verify seed number of sampling +insert overwrite table ss_i_part partition (p='1') select key+10000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss_i_part@p=1 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss_i_part@p=2 +POSTHOOK: query: insert overwrite table ss_i_part partition (p='2') select key+20000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss_i_part@p=2 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ss_i_part@p=3 +POSTHOOK: query: insert overwrite table ss_i_part partition (p='3') select key+30000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ss_i_part@p=3 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table ss_src3 as select key, value from ss_i_part +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ss_i_part@p=1 +PREHOOK: Input: default@ss_i_part@p=2 +PREHOOK: Input: default@ss_i_part@p=3 +POSTHOOK: query: create table ss_src3 as select key, value from ss_i_part +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ss_i_part@p=1 +POSTHOOK: Input: default@ss_i_part@p=2 +POSTHOOK: Input: default@ss_i_part@p=3 +POSTHOOK: Output: default@ss_src3 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ss_src3 +POSTHOOK: query: create table ss_t3 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ss_src3 +POSTHOOK: Output: default@ss_t3 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ss_src3 +POSTHOOK: query: create table ss_t4 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ss_src3 +POSTHOOK: Output: default@ss_t4 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ss_src3 +POSTHOOK: query: create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ss_src3 +POSTHOOK: Output: default@ss_t5 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_t3 +PREHOOK: Input: default@ss_t4 +PREHOOK: Input: default@ss_t5 +#### A masked pattern was here #### +POSTHOOK: query: select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_t3 +POSTHOOK: Input: default@ss_t4 +POSTHOOK: Input: default@ss_t5 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +320 +PREHOOK: query: -- sample more than one split +explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- sample more than one split +explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 70))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + ss_src2 + TableScan + alias: ss_src2 + Select Operator + expressions: + expr: key + type: int + outputColumnNames: key + Group By Operator + aggregations: + expr: count(DISTINCT key) + bucketGroup: false + keys: + expr: key + type: int + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + tag: -1 + value expressions: + expr: _col1 + type: bigint + Percentage Sample: + ss_src2 + percentage: 70.0 + seed number: 5 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col0:0._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 10 + + +PREHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select count(distinct key) from ss_src2 tablesample(70 percent) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +309 +PREHOOK: query: -- sample all splits +select count(1) from ss_src2 tablesample(100 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: -- sample all splits +select count(1) from ss_src2 tablesample(100 percent) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1500 +PREHOOK: query: -- subquery +explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq +PREHOOK: type: QUERY +POSTHOOK: query: -- subquery +explain select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq +POSTHOOK: type: QUERY +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:ss_src2 + TableScan + alias: ss_src2 + Select Operator + expressions: + expr: key + type: int + outputColumnNames: _col0 + Limit + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: int + Percentage Sample: + subq:ss_src2 + percentage: 1.0 + seed number: 5 + Reduce Operator Tree: + Extract + Limit + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select key from (select key from ss_src2 tablesample(1 percent) limit 10) subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 +86 +311 +27 +165 +409 +255 +278 +98 +484 +PREHOOK: query: -- groupby +select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: -- groupby +select key, count(1) from ss_src2 tablesample(1 percent) group by key order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 3 +2 1 +4 1 +5 3 +8 1 +9 1 +10 1 +11 1 +12 2 +15 2 +17 1 +18 2 +19 1 +20 1 +24 2 +26 2 +27 1 +28 1 +30 1 +33 1 +34 1 +35 3 +37 2 +41 1 +42 2 +43 1 +44 1 +47 1 +51 2 +53 1 +54 1 +57 1 +58 2 +64 1 +65 1 +66 1 +67 2 +69 1 +70 3 +72 2 +74 1 +76 2 +77 1 +78 1 +80 1 +82 1 +83 2 +84 2 +85 1 +86 1 +87 1 +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +100 2 +103 2 +104 2 +105 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +120 2 +125 2 +126 1 +128 3 +129 2 +131 1 +133 1 +134 2 +136 1 +137 2 +138 4 +143 1 +145 1 +146 2 +149 2 +150 1 +152 2 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 2 +165 2 +166 1 +167 3 +168 1 +169 4 +170 1 +172 2 +174 2 +175 2 +176 2 +177 1 +178 1 +179 2 +180 1 +181 1 +183 1 +186 1 +187 3 +189 1 +190 1 +191 2 +192 1 +193 3 +194 1 +195 2 +196 1 +197 2 +199 3 +200 2 +201 1 +202 1 +203 2 +205 2 +207 2 +208 3 +209 2 +213 2 +214 1 +216 2 +217 2 +218 1 +219 2 +221 2 +222 1 +223 2 +224 2 +226 1 +228 1 +229 2 +230 5 +233 2 +235 1 +237 2 +238 2 +239 2 +241 1 +242 2 +244 1 +247 1 +248 1 +249 1 +252 1 +255 2 +256 2 +257 1 +258 1 +260 1 +262 1 +263 1 +265 2 +266 1 +272 2 +273 3 +274 1 +275 1 +277 4 +278 2 +280 2 +281 2 +282 2 +283 1 +284 1 +285 1 +286 1 +287 1 +288 2 +289 1 +291 1 +292 1 +296 1 +298 3 +302 1 +305 1 +306 1 +307 2 +308 1 +309 2 +310 1 +311 3 +315 1 +316 3 +317 2 +318 3 +321 2 +322 2 +323 1 +325 2 +327 3 +331 2 +332 1 +333 2 +335 1 +336 1 +338 1 +339 1 +341 1 +342 2 +344 2 +345 1 +348 5 +351 1 +353 2 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 2 +368 1 +369 3 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 2 +384 3 +386 1 +389 1 +392 1 +393 1 +394 1 +395 2 +396 3 +397 2 +399 2 +400 1 +401 5 +402 1 +403 3 +404 2 +406 4 +407 1 +409 3 +411 1 +413 2 +414 2 +417 3 +418 1 +419 1 +421 1 +424 2 +427 1 +429 2 +430 3 +431 3 +432 1 +435 1 +436 1 +437 1 +438 3 +439 2 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +PREHOOK: query: -- sample one of two tables: +create table ss_src1 as select * from ss_src2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ss_src2 +POSTHOOK: query: -- sample one of two tables: +create table ss_src1 as select * from ss_src2 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ss_src2 +POSTHOOK: Output: default@ss_src1 +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src1 +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select t2.key as k from ss_src1 join ss_src2 tablesample(1 percent) t2 on ss_src1.key=t2.key order by k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src1 +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +2 +2 +2 +4 +4 +4 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +5 +8 +8 +8 +9 +9 +9 +10 +10 +10 +11 +11 +11 +12 +12 +12 +12 +12 +12 +12 +12 +12 +12 +12 +12 +15 +15 +15 +15 +15 +15 +15 +15 +15 +15 +15 +15 +17 +17 +17 +18 +18 +18 +18 +18 +18 +18 +18 +18 +18 +18 +18 +19 +19 +19 +20 +20 +20 +24 +24 +24 +24 +24 +24 +24 +24 +24 +24 +24 +24 +26 +26 +26 +26 +26 +26 +26 +26 +26 +26 +26 +26 +27 +27 +27 +28 +28 +28 +30 +30 +30 +33 +33 +33 +34 +34 +34 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +35 +37 +37 +37 +37 +37 +37 +37 +37 +37 +37 +37 +37 +41 +41 +41 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +42 +43 +43 +43 +44 +44 +44 +47 +47 +47 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +51 +53 +53 +53 +54 +54 +54 +57 +57 +57 +58 +58 +58 +58 +58 +58 +58 +58 +58 +58 +58 +58 +64 +64 +64 +65 +65 +65 +66 +66 +66 +67 +67 +67 +67 +67 +67 +67 +67 +67 +67 +67 +67 +69 +69 +69 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +70 +72 +72 +72 +72 +72 +72 +72 +72 +72 +72 +72 +72 +74 +74 +74 +76 +76 +76 +76 +76 +76 +76 +76 +76 +76 +76 +76 +77 +77 +77 +78 +78 +78 +80 +80 +80 +82 +82 +82 +83 +83 +83 +83 +83 +83 +83 +83 +83 +83 +83 +83 +84 +84 +84 +84 +84 +84 +84 +84 +84 +84 +84 +84 +85 +85 +85 +86 +86 +86 +87 +87 +87 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +90 +92 +92 +92 +95 +95 +95 +95 +95 +95 +95 +95 +95 +95 +95 +95 +96 +96 +96 +97 +97 +97 +97 +97 +97 +97 +97 +97 +97 +97 +97 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +98 +100 +100 +100 +100 +100 +100 +100 +100 +100 +100 +100 +100 +103 +103 +103 +103 +103 +103 +103 +103 +103 +103 +103 +103 +104 +104 +104 +104 +104 +104 +104 +104 +104 +104 +104 +104 +105 +105 +105 +111 +111 +111 +113 +113 +113 +113 +113 +113 +113 +113 +113 +113 +113 +113 +114 +114 +114 +116 +116 +116 +118 +118 +118 +118 +118 +118 +118 +118 +118 +118 +118 +118 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +119 +120 +120 +120 +120 +120 +120 +120 +120 +120 +120 +120 +120 +125 +125 +125 +125 +125 +125 +125 +125 +125 +125 +125 +125 +126 +126 +126 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +128 +129 +129 +129 +129 +129 +129 +129 +129 +129 +129 +129 +129 +131 +131 +131 +133 +133 +133 +134 +134 +134 +134 +134 +134 +134 +134 +134 +134 +134 +134 +136 +136 +136 +137 +137 +137 +137 +137 +137 +137 +137 +137 +137 +137 +137 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +138 +143 +143 +143 +145 +145 +145 +146 +146 +146 +146 +146 +146 +146 +146 +146 +146 +146 +146 +149 +149 +149 +149 +149 +149 +149 +149 +149 +149 +149 +149 +150 +150 +150 +152 +152 +152 +152 +152 +152 +152 +152 +152 +152 +152 +152 +153 +153 +153 +155 +155 +155 +156 +156 +156 +157 +157 +157 +158 +158 +158 +160 +160 +160 +162 +162 +162 +163 +163 +163 +164 +164 +164 +164 +164 +164 +164 +164 +164 +164 +164 +164 +165 +165 +165 +165 +165 +165 +165 +165 +165 +165 +165 +165 +166 +166 +166 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +167 +168 +168 +168 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +169 +170 +170 +170 +172 +172 +172 +172 +172 +172 +172 +172 +172 +172 +172 +172 +174 +174 +174 +174 +174 +174 +174 +174 +174 +174 +174 +174 +175 +175 +175 +175 +175 +175 +175 +175 +175 +175 +175 +175 +176 +176 +176 +176 +176 +176 +176 +176 +176 +176 +176 +176 +177 +177 +177 +178 +178 +178 +179 +179 +179 +179 +179 +179 +179 +179 +179 +179 +179 +179 +180 +180 +180 +181 +181 +181 +183 +183 +183 +186 +186 +186 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +187 +189 +189 +189 +190 +190 +190 +191 +191 +191 +191 +191 +191 +191 +191 +191 +191 +191 +191 +192 +192 +192 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +193 +194 +194 +194 +195 +195 +195 +195 +195 +195 +195 +195 +195 +195 +195 +195 +196 +196 +196 +197 +197 +197 +197 +197 +197 +197 +197 +197 +197 +197 +197 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +199 +200 +200 +200 +200 +200 +200 +200 +200 +200 +200 +200 +200 +201 +201 +201 +202 +202 +202 +203 +203 +203 +203 +203 +203 +203 +203 +203 +203 +203 +203 +205 +205 +205 +205 +205 +205 +205 +205 +205 +205 +205 +205 +207 +207 +207 +207 +207 +207 +207 +207 +207 +207 +207 +207 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +208 +209 +209 +209 +209 +209 +209 +209 +209 +209 +209 +209 +209 +213 +213 +213 +213 +213 +213 +213 +213 +213 +213 +213 +213 +214 +214 +214 +216 +216 +216 +216 +216 +216 +216 +216 +216 +216 +216 +216 +217 +217 +217 +217 +217 +217 +217 +217 +217 +217 +217 +217 +218 +218 +218 +219 +219 +219 +219 +219 +219 +219 +219 +219 +219 +219 +219 +221 +221 +221 +221 +221 +221 +221 +221 +221 +221 +221 +221 +222 +222 +222 +223 +223 +223 +223 +223 +223 +223 +223 +223 +223 +223 +223 +224 +224 +224 +224 +224 +224 +224 +224 +224 +224 +224 +224 +226 +226 +226 +228 +228 +228 +229 +229 +229 +229 +229 +229 +229 +229 +229 +229 +229 +229 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +230 +233 +233 +233 +233 +233 +233 +233 +233 +233 +233 +233 +233 +235 +235 +235 +237 +237 +237 +237 +237 +237 +237 +237 +237 +237 +237 +237 +238 +238 +238 +238 +238 +238 +238 +238 +238 +238 +238 +238 +239 +239 +239 +239 +239 +239 +239 +239 +239 +239 +239 +239 +241 +241 +241 +242 +242 +242 +242 +242 +242 +242 +242 +242 +242 +242 +242 +244 +244 +244 +247 +247 +247 +248 +248 +248 +249 +249 +249 +252 +252 +252 +255 +255 +255 +255 +255 +255 +255 +255 +255 +255 +255 +255 +256 +256 +256 +256 +256 +256 +256 +256 +256 +256 +256 +256 +257 +257 +257 +258 +258 +258 +260 +260 +260 +262 +262 +262 +263 +263 +263 +265 +265 +265 +265 +265 +265 +265 +265 +265 +265 +265 +265 +266 +266 +266 +272 +272 +272 +272 +272 +272 +272 +272 +272 +272 +272 +272 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +273 +274 +274 +274 +275 +275 +275 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +277 +278 +278 +278 +278 +278 +278 +278 +278 +278 +278 +278 +278 +280 +280 +280 +280 +280 +280 +280 +280 +280 +280 +280 +280 +281 +281 +281 +281 +281 +281 +281 +281 +281 +281 +281 +281 +282 +282 +282 +282 +282 +282 +282 +282 +282 +282 +282 +282 +283 +283 +283 +284 +284 +284 +285 +285 +285 +286 +286 +286 +287 +287 +287 +288 +288 +288 +288 +288 +288 +288 +288 +288 +288 +288 +288 +289 +289 +289 +291 +291 +291 +292 +292 +292 +296 +296 +296 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +298 +302 +302 +302 +305 +305 +305 +306 +306 +306 +307 +307 +307 +307 +307 +307 +307 +307 +307 +307 +307 +307 +308 +308 +308 +309 +309 +309 +309 +309 +309 +309 +309 +309 +309 +309 +309 +310 +310 +310 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +311 +315 +315 +315 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +316 +317 +317 +317 +317 +317 +317 +317 +317 +317 +317 +317 +317 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +318 +321 +321 +321 +321 +321 +321 +321 +321 +321 +321 +321 +321 +322 +322 +322 +322 +322 +322 +322 +322 +322 +322 +322 +322 +323 +323 +323 +325 +325 +325 +325 +325 +325 +325 +325 +325 +325 +325 +325 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +327 +331 +331 +331 +331 +331 +331 +331 +331 +331 +331 +331 +331 +332 +332 +332 +333 +333 +333 +333 +333 +333 +333 +333 +333 +333 +333 +333 +335 +335 +335 +336 +336 +336 +338 +338 +338 +339 +339 +339 +341 +341 +341 +342 +342 +342 +342 +342 +342 +342 +342 +342 +342 +342 +342 +344 +344 +344 +344 +344 +344 +344 +344 +344 +344 +344 +344 +345 +345 +345 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +348 +351 +351 +351 +353 +353 +353 +353 +353 +353 +353 +353 +353 +353 +353 +353 +356 +356 +356 +360 +360 +360 +362 +362 +362 +364 +364 +364 +365 +365 +365 +366 +366 +366 +367 +367 +367 +367 +367 +367 +367 +367 +367 +367 +367 +367 +368 +368 +368 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +369 +373 +373 +373 +374 +374 +374 +375 +375 +375 +377 +377 +377 +378 +378 +378 +379 +379 +379 +382 +382 +382 +382 +382 +382 +382 +382 +382 +382 +382 +382 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +384 +386 +386 +386 +389 +389 +389 +392 +392 +392 +393 +393 +393 +394 +394 +394 +395 +395 +395 +395 +395 +395 +395 +395 +395 +395 +395 +395 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +396 +397 +397 +397 +397 +397 +397 +397 +397 +397 +397 +397 +397 +399 +399 +399 +399 +399 +399 +399 +399 +399 +399 +399 +399 +400 +400 +400 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +401 +402 +402 +402 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +403 +404 +404 +404 +404 +404 +404 +404 +404 +404 +404 +404 +404 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +406 +407 +407 +407 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +409 +411 +411 +411 +413 +413 +413 +413 +413 +413 +413 +413 +413 +413 +413 +413 +414 +414 +414 +414 +414 +414 +414 +414 +414 +414 +414 +414 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +417 +418 +418 +418 +419 +419 +419 +421 +421 +421 +424 +424 +424 +424 +424 +424 +424 +424 +424 +424 +424 +424 +427 +427 +427 +429 +429 +429 +429 +429 +429 +429 +429 +429 +429 +429 +429 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +430 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +431 +432 +432 +432 +435 +435 +435 +436 +436 +436 +437 +437 +437 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +438 +439 +439 +439 +439 +439 +439 +439 +439 +439 +439 +439 +439 +443 +443 +443 +444 +444 +444 +446 +446 +446 +448 +448 +448 +449 +449 +449 +452 +452 +452 +453 +453 +453 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +454 +455 +455 +455 +457 +457 +457 +458 +458 +458 +458 +458 +458 +458 +458 +458 +458 +458 +458 +459 +459 +459 +459 +459 +459 +459 +459 +459 +459 +459 +459 +460 +460 +460 +462 +462 +462 +462 +462 +462 +462 +462 +462 +462 +462 +462 +463 +463 +463 +463 +463 +463 +463 +463 +463 +463 +463 +463 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +466 +467 +467 +467 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +468 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +469 +470 +470 +470 +472 +472 +472 +475 +475 +475 +477 +477 +477 +478 +478 +478 +478 +478 +478 +478 +478 +478 +478 +478 +478 +479 +479 +479 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +480 +481 +481 +481 +482 +482 +482 +483 +483 +483 +484 +484 +484 +485 +485 +485 +487 +487 +487 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +489 +490 +490 +490 +491 +491 +491 +492 +492 +492 +492 +492 +492 +492 +492 +492 +492 +492 +492 +493 +493 +493 +494 +494 +494 +495 +495 +495 +496 +496 +496 +497 +497 +497 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +498 +PREHOOK: query: -- sample two tables +explain select * from ( +select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key +) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) +PREHOOK: type: QUERY +POSTHOOK: query: -- sample two tables +explain select * from ( +select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key +) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_FULLOUTERJOIN (TOK_TABREF (TOK_TABNAME ss_src1) (TOK_TABLESPLITSAMPLE 80) t1) (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 2) t2) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL t1) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL t2) key) k)))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (or (TOK_FUNCTION in (TOK_TABLE_OR_COL k) 199 10199 20199) (TOK_FUNCTION in (TOK_TABLE_OR_COL k1) 199 10199 20199))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + subq:t1 + TableScan + alias: t1 + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 0 + value expressions: + expr: key + type: int + subq:t2 + TableScan + alias: t2 + Reduce Output Operator + key expressions: + expr: key + type: int + sort order: + + Map-reduce partition columns: + expr: key + type: int + tag: 1 + value expressions: + expr: key + type: int + Percentage Sample: + subq:t1 + percentage: 80.0 + seed number: 5 + subq:t2 + percentage: 2.0 + seed number: 5 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Filter Operator + predicate: + expr: ((_col4) IN (199, 10199, 20199) or (_col0) IN (199, 10199, 20199)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col4 + type: int + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * from ( +select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key +) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src1 +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ( +select t1.key as k1, t2.key as k from ss_src1 tablesample(80 percent) t1 full outer join ss_src2 tablesample(2 percent) t2 on t1.key=t2.key +) subq where k in (199, 10199, 20199) or k1 in (199, 10199, 20199) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src1 +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +199 199 +PREHOOK: query: -- shrink last split +explain select count(1) from ss_src2 tablesample(1 percent) +PREHOOK: type: QUERY +POSTHOOK: query: -- shrink last split +explain select count(1) from ss_src2 tablesample(1 percent) +POSTHOOK: type: QUERY +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME ss_src2) (TOK_TABLESPLITSAMPLE 1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + ss_src2 + TableScan + alias: ss_src2 + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Percentage Sample: + ss_src2 + percentage: 1.0 + seed number: 5 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from ss_src2 tablesample(1 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from ss_src2 tablesample(1 percent) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +500 +PREHOOK: query: select count(1) from ss_src2 tablesample(50 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from ss_src2 tablesample(50 percent) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ss_src2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ss_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1000 diff --git service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java index 8c3fe34..fd38907 100644 --- service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java +++ service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java @@ -52,7 +52,7 @@ public class TestHiveServerSessions extends TestCase { } }); server.start(); - Thread.sleep(1000); + Thread.sleep(5000); for (int i = 0; i < transports.length ; i++) { TSocket transport = new TSocket("localhost", port); diff --git shims/ivy.xml shims/ivy.xml index 8667a71..dd0855f 100644 --- shims/ivy.xml +++ shims/ivy.xml @@ -38,12 +38,12 @@ transitive="false"/> - + @@ -90,8 +90,9 @@ - + @@ -115,6 +116,8 @@ +