diff --git build-common.xml build-common.xml index d88bd84..40f96fc 100644 --- build-common.xml +++ build-common.xml @@ -55,11 +55,11 @@ - + - + @@ -86,11 +86,18 @@ - - + - + + + + + + + @@ -177,7 +184,9 @@ + @@ -300,6 +309,9 @@ + + + diff --git build.properties build.properties index 6995b67..157dc23 100644 --- build.properties +++ build.properties @@ -28,7 +28,7 @@ javac.args.warnings= hadoop-0.20.version=0.20.2 hadoop-0.20S.version=1.0.0 -hadoop-0.23.version=0.23.1 +hadoop-0.23.version=0.23.3 hadoop.version=${hadoop-0.20.version} hadoop.security.version=${hadoop-0.20S.version} hadoop.mirror=http://mirror.facebook.net/facebook/hive-deps diff --git common/ivy.xml common/ivy.xml index afc61d7..da90e9c 100644 --- common/ivy.xml +++ common/ivy.xml @@ -28,28 +28,28 @@ @@ -59,7 +59,7 @@ conf="hadoop23.compile->default" transitive="false" /> @@ -67,14 +67,14 @@ diff --git eclipse-templates/.classpath eclipse-templates/.classpath index b550bc5..b20c2ac 100644 --- eclipse-templates/.classpath +++ eclipse-templates/.classpath @@ -32,7 +32,7 @@ - + diff --git hbase-handler/ivy.xml hbase-handler/ivy.xml index 82258e3..9a12fce 100644 --- hbase-handler/ivy.xml +++ hbase-handler/ivy.xml @@ -46,5 +46,34 @@ transitive="false"/> + + + + + + + + + + + + + + + + + + + + + + diff --git ivy/libraries.properties ivy/libraries.properties index 13e6f29..9fab84b 100644 --- ivy/libraries.properties +++ ivy/libraries.properties @@ -40,7 +40,8 @@ commons-logging.version=1.0.4 commons-logging-api.version=1.0.4 commons-pool.version=1.5.4 derby.version=10.4.2.0 -guava.version=r09 +guava-hadoop20.version=r09 +guava-hadoop23.version=11.0.2 hbase.version=0.92.0 jackson.version=1.8.8 javaewah.version=0.3.2 diff --git ql/ivy.xml ql/ivy.xml index c408a79..c57b016 100644 --- ql/ivy.xml +++ ql/ivy.xml @@ -39,6 +39,12 @@ + + + + - @@ -82,5 +86,26 @@ + + + + + + + + + + + + + + + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java index e20cdd4..170f427 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HadoopJobExecHelper.java @@ -541,7 +541,11 @@ public class HadoopJobExecHelper { } // These tasks should have come from the same job. assert (ti.getJobId() != null && ti.getJobId().equals(jobId)); - ti.getLogUrls().add(getTaskAttemptLogUrl(t.getTaskTrackerHttp(), t.getTaskId())); + String taskAttemptLogUrl = ShimLoader.getHadoopShims().getTaskAttemptLogUrl( + conf, t.getTaskTrackerHttp(), t.getTaskId()); + if (taskAttemptLogUrl != null) { + ti.getLogUrls().add(taskAttemptLogUrl); + } // If a task failed, then keep track of the total number of failures // for that task (typically, a task gets re-run up to 4 times if it diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java index 85e1385..47e0703 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JobDebugger.java @@ -146,7 +146,11 @@ public class JobDebugger implements Runnable { } // These tasks should have come from the same job. assert (ti.getJobId() != null && ti.getJobId().equals(jobId)); - ti.getLogUrls().add(getTaskAttemptLogUrl(t.getTaskTrackerHttp(), t.getTaskId())); + String taskAttemptLogUrl = ShimLoader.getHadoopShims().getTaskAttemptLogUrl( + conf, t.getTaskTrackerHttp(), t.getTaskId()); + if (taskAttemptLogUrl != null) { + ti.getLogUrls().add(taskAttemptLogUrl); + } // If a task failed, then keep track of the total number of failures // for that task (typically, a task gets re-run up to 4 times if it diff --git ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java index 1fcf2fe..ae40b6d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java +++ ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java @@ -45,6 +45,7 @@ import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileStatus; @@ -212,8 +213,25 @@ public class QTestUtil { conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, (new Path(dfs.getFileSystem().getUri().toString(), "/build/ql/test/data/warehouse/")).toString()); + int port = 0; + + try { + // Hadoop20 MiniMRCluster will return a proper port. + // Hadoop23 MiniMRCluster does not implement this method so use the default RM port. + port = mr.getJobTrackerPort(); + } catch (UnsupportedOperationException e) { + String address = + StringUtils.substringAfterLast(conf.get("yarn.resourcemanager.address"), ":"); + + if (StringUtils.isBlank(address)) { + throw new IllegalArgumentException("Invalid YARN resource manager port."); + } + + port = Integer.parseInt(address); + } + ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, - "localhost:" + mr.getJobTrackerPort()); + "localhost:" + port); } } @@ -284,43 +302,62 @@ public class QTestUtil { StringBuilder qsb = new StringBuilder(); // Look for a hint to not run a test on some Hadoop versions - Pattern pattern = Pattern.compile("-- EXCLUDE_HADOOP_MAJOR_VERSIONS(.*)"); + Pattern pattern = Pattern.compile("-- (EX|IN)CLUDE_HADOOP_MAJOR_VERSIONS\\((.*)\\)"); // Read the entire query boolean excludeQuery = false; + boolean includeQuery = false; + Set versionSet = new HashSet(); String hadoopVer = ShimLoader.getMajorVersion(); while (dis.available() != 0) { String line = dis.readLine(); - // While we are reading the lines, detect whether this query wants to be - // excluded from running because the Hadoop version is incorrect + // Each qfile may include at most one INCLUDE or EXCLUDE directive. + // + // If a qfile contains an INCLUDE directive, and hadoopVer does + // not appear in the list of versions to include, then the qfile + // is skipped. + // + // If a qfile contains an EXCLUDE directive, and hadoopVer is + // listed in the list of versions to EXCLUDE, then the qfile is + // skipped. + // + // Otherwise, the qfile is included. Matcher matcher = pattern.matcher(line); if (matcher.find()) { - String group = matcher.group(); - int start = group.indexOf('('); - int end = group.indexOf(')'); - assert end > start; - // versions might be something like '0.17, 0.19' - String versions = group.substring(start+1, end); - - Set excludedVersionSet = new HashSet(); - for (String s : versions.split("\\,")) { - s = s.trim(); - excludedVersionSet.add(s); + if (excludeQuery || includeQuery) { + String message = "QTestUtil: qfile " + qf.getName() + + " contains more than one reference to (EX|IN)CLUDE_HADOOP_MAJOR_VERSIONS"; + throw new UnsupportedOperationException(message); } - if (excludedVersionSet.contains(hadoopVer)) { + + String prefix = matcher.group(1); + if ("EX".equals(prefix)) { excludeQuery = true; + } else { + includeQuery = true; + } + + String versions = matcher.group(2); + for (String s : versions.split("\\,")) { + s = s.trim(); + versionSet.add(s); } } qsb.append(line + "\n"); } qMap.put(qf.getName(), qsb.toString()); - if(excludeQuery) { - System.out.println("Due to the Hadoop Version ("+ hadoopVer + "), " + - "adding query " + qf.getName() + " to the set of tests to skip"); + + if (excludeQuery && versionSet.contains(hadoopVer)) { + System.out.println("QTestUtil: " + qf.getName() + + " EXCLUDE list contains Hadoop Version " + hadoopVer + ". Skipping..."); qSkipSet.add(qf.getName()); - } + } else if (includeQuery && !versionSet.contains(hadoopVer)) { + System.out.println("QTestUtil: " + qf.getName() + + " INCLUDE list does not contain Hadoop Version " + hadoopVer + ". Skipping..."); + qSkipSet.add(qf.getName()); + } dis.close(); } @@ -521,6 +558,7 @@ public class QTestUtil { fpath = new Path(testFiles, "json.txt"); runLoadCmd("LOAD DATA LOCAL INPATH '" + fpath.toString() + "' INTO TABLE src_json"); + conf.setBoolean("hive.test.init.phase", false); } @@ -934,11 +972,14 @@ public class QTestUtil { ".*USING 'java -cp.*", "^Deleted.*", }; - maskPatterns(patterns, (new File(logDir, tname + ".out")).getPath()); + + File f = new File(logDir, tname + ".out"); + + maskPatterns(patterns, f.getPath()); cmdArray = new String[] { "diff", "-a", - (new File(logDir, tname + ".out")).getPath(), + f.getPath(), outFileName }; @@ -960,7 +1001,7 @@ public class QTestUtil { System.out.println("Overwriting results"); cmdArray = new String[3]; cmdArray[0] = "cp"; - cmdArray[1] = (new File(logDir, tname + ".out")).getPath(); + cmdArray[1] = f.getPath(); cmdArray[2] = outFileName; executor = Runtime.getRuntime().exec(cmdArray); exitVal = executor.waitFor(); diff --git ql/src/test/queries/clientnegative/autolocal1.q ql/src/test/queries/clientnegative/autolocal1.q index 6bee177..bd1c9d6 100644 --- ql/src/test/queries/clientnegative/autolocal1.q +++ ql/src/test/queries/clientnegative/autolocal1.q @@ -2,4 +2,14 @@ set mapred.job.tracker=abracadabra; set hive.exec.mode.local.auto.inputbytes.max=1; set hive.exec.mode.local.auto=true; +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- hadoop0.23 changes the behavior of JobClient initialization +-- in hadoop0.20, JobClient initialization tries to get JobTracker's address +-- this throws the expected IllegalArgumentException +-- in hadoop0.23, JobClient initialization only initializes cluster +-- and get user group information +-- not attempts to get JobTracker's address +-- no IllegalArgumentException thrown in JobClient Initialization +-- an exception is thrown when JobClient submitJob + SELECT key FROM src; diff --git ql/src/test/queries/clientnegative/local_mapred_error_cache.q ql/src/test/queries/clientnegative/local_mapred_error_cache.q index 8f4b37a..33bc4cd 100644 --- ql/src/test/queries/clientnegative/local_mapred_error_cache.q +++ ql/src/test/queries/clientnegative/local_mapred_error_cache.q @@ -1,4 +1,6 @@ set hive.exec.mode.local.auto=true; set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateLocalErrorsHook; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value); diff --git ql/src/test/queries/clientnegative/local_mapred_error_cache_hadoop20.q ql/src/test/queries/clientnegative/local_mapred_error_cache_hadoop20.q new file mode 100644 index 0000000..fad3c94 --- /dev/null +++ ql/src/test/queries/clientnegative/local_mapred_error_cache_hadoop20.q @@ -0,0 +1,6 @@ +set hive.exec.mode.local.auto=true; +set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateLocalErrorsHook; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value); diff --git ql/src/test/queries/clientnegative/mapreduce_stack_trace.q ql/src/test/queries/clientnegative/mapreduce_stack_trace.q index 37b86aa..6cc3717 100644 --- ql/src/test/queries/clientnegative/mapreduce_stack_trace.q +++ ql/src/test/queries/clientnegative/mapreduce_stack_trace.q @@ -2,4 +2,6 @@ set hive.exec.mode.local.auto=false; set hive.exec.job.debug.capture.stacktraces=true; set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateStackTracesHook; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value); diff --git ql/src/test/queries/clientnegative/mapreduce_stack_trace_hadoop20.q ql/src/test/queries/clientnegative/mapreduce_stack_trace_hadoop20.q new file mode 100644 index 0000000..64edc16 --- /dev/null +++ ql/src/test/queries/clientnegative/mapreduce_stack_trace_hadoop20.q @@ -0,0 +1,7 @@ +set hive.exec.mode.local.auto=false; +set hive.exec.job.debug.capture.stacktraces=true; +set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.VerifySessionStateStackTracesHook; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value); diff --git ql/src/test/queries/clientpositive/auto_join14.q ql/src/test/queries/clientpositive/auto_join14.q index 9552c86..fbae9da 100644 --- ql/src/test/queries/clientpositive/auto_join14.q +++ ql/src/test/queries/clientpositive/auto_join14.q @@ -1,9 +1,12 @@ set hive.auto.convert.join = true; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapred.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; explain diff --git ql/src/test/queries/clientpositive/auto_join14_hadoop20.q ql/src/test/queries/clientpositive/auto_join14_hadoop20.q new file mode 100644 index 0000000..e7ef6cf --- /dev/null +++ ql/src/test/queries/clientpositive/auto_join14_hadoop20.q @@ -0,0 +1,18 @@ + +set hive.auto.convert.join = true; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; + +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value; + +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value; + +SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1; diff --git ql/src/test/queries/clientpositive/combine2.q ql/src/test/queries/clientpositive/combine2.q index e2466d4..be94783 100644 --- ql/src/test/queries/clientpositive/combine2.q +++ ql/src/test/queries/clientpositive/combine2.q @@ -1,3 +1,5 @@ +USE default; + set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.min.split.size=256; set mapred.min.split.size.per.node=256; @@ -8,8 +10,18 @@ set hive.exec.dynamic.partition.mode=nonstrict; set mapred.cache.shared.enabled=false; set hive.merge.smallfiles.avgsize=0; + + create table combine2(key string) partitioned by (value string); +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 diff --git ql/src/test/queries/clientpositive/combine2_hadoop20.q ql/src/test/queries/clientpositive/combine2_hadoop20.q new file mode 100644 index 0000000..2cc05ea --- /dev/null +++ ql/src/test/queries/clientpositive/combine2_hadoop20.q @@ -0,0 +1,48 @@ +USE default; + +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.min.split.size=256; +set mapred.min.split.size.per.node=256; +set mapred.min.split.size.per.rack=256; +set mapred.max.split.size=256; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set mapred.cache.shared.enabled=false; +set hive.merge.smallfiles.avgsize=0; + + + +create table combine2(key string) partitioned by (value string); + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) +select * from ( + select key, value from src where key < 10 + union all + select key, '|' as value from src where key = 11 + union all + select key, '2010-04-21 09:45:00' value from src where key = 19) s; + +show partitions combine2; + +explain +select key, value from combine2 where value is not null order by key; + +select key, value from combine2 where value is not null order by key; + +explain extended +select count(1) from combine2 where value is not null; + +select count(1) from combine2 where value is not null; + +explain +select ds, count(1) from srcpart where ds is not null group by ds; + +select ds, count(1) from srcpart where ds is not null group by ds; diff --git ql/src/test/queries/clientpositive/ctas.q ql/src/test/queries/clientpositive/ctas.q index d44e3c0..569d460 100644 --- ql/src/test/queries/clientpositive/ctas.q +++ ql/src/test/queries/clientpositive/ctas.q @@ -1,10 +1,4 @@ - - - - - - - +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) create table nzhang_Tmp(a int, b string); select * from nzhang_Tmp; @@ -55,7 +49,8 @@ describe formatted nzhang_CTAS4; explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; @@ -63,12 +58,3 @@ create table nzhang_ctas5 row format delimited fields terminated by ',' lines te create table nzhang_ctas6 (key string, `to` string); insert overwrite table nzhang_ctas6 select key, value from src limit 10; create table nzhang_ctas7 as select key, `to` from nzhang_ctas6; - - - - - - - - - diff --git ql/src/test/queries/clientpositive/ctas_hadoop20.q ql/src/test/queries/clientpositive/ctas_hadoop20.q new file mode 100644 index 0000000..6ae7ab9 --- /dev/null +++ ql/src/test/queries/clientpositive/ctas_hadoop20.q @@ -0,0 +1,59 @@ +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string); +select * from nzhang_Tmp; + +explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10; + +create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10; + +select * from nzhang_CTAS1; + +describe formatted nzhang_CTAS1; + + +explain create table nzhang_ctas2 as select * from src sort by key, value limit 10; + +create table nzhang_ctas2 as select * from src sort by key, value limit 10; + +select * from nzhang_ctas2; + +describe formatted nzhang_CTAS2; + + +explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10; + +create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10; + +select * from nzhang_ctas3; + +describe formatted nzhang_CTAS3; + + +explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2; + +create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2; + +select * from nzhang_ctas3; + +describe formatted nzhang_CTAS3; + + +explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10; + +create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10; + +select * from nzhang_ctas4; + +describe formatted nzhang_CTAS4; + +explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; + +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10; + +create table nzhang_ctas6 (key string, `to` string); +insert overwrite table nzhang_ctas6 select key, value from src limit 10; +create table nzhang_ctas7 as select key, `to` from nzhang_ctas6; diff --git ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q index bccdc76..27c3d67 100644 --- ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q +++ ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q @@ -6,7 +6,7 @@ CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; SET hive.exec.compress.intermediate=true; -SET hive.exec.compress.output=true; +SET hive.exec.compress.output=true; EXPLAIN FROM SRC @@ -17,5 +17,5 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10; -SELECT DEST1.* FROM DEST1; -SELECT DEST2.* FROM DEST2; +SELECT DEST1.* FROM DEST1 ORDER BY key ASC, value ASC; +SELECT DEST2.* FROM DEST2 ORDER BY key ASC, value ASC; diff --git ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q index 4c372d2..22c5a38 100644 --- ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q +++ ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q @@ -12,6 +12,6 @@ FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10; -SELECT DEST1.* FROM DEST1; -SELECT DEST2.* FROM DEST2; +SELECT DEST1.* FROM DEST1 ORDER BY key[0] ASC, value ASC; +SELECT DEST2.* FROM DEST2 ORDER BY 1 ASC, value ASC; diff --git ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q index 1cdb254..145aab8 100644 --- ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q +++ ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q @@ -17,9 +17,9 @@ INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1); -SELECT * FROM dest_g2; -SELECT * FROM dest_g3; -SELECT * FROM dest_g4; +SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; +SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; +SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; EXPLAIN FROM src @@ -36,11 +36,11 @@ INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1); -SELECT * FROM dest_g2; -SELECT * FROM dest_g3; -SELECT * FROM dest_g4; -SELECT * FROM dest_h2; -SELECT * FROM dest_h3; +SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; +SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; +SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; +SELECT * FROM dest_h2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; +SELECT * FROM dest_h3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; DROP TABLE dest_g2; DROP TABLE dest_g3; diff --git ql/src/test/queries/clientpositive/input12.q ql/src/test/queries/clientpositive/input12.q index 083a445..72817d5 100644 --- ql/src/test/queries/clientpositive/input12.q +++ ql/src/test/queries/clientpositive/input12.q @@ -1,6 +1,9 @@ -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE; diff --git ql/src/test/queries/clientpositive/input12_hadoop20.q ql/src/test/queries/clientpositive/input12_hadoop20.q new file mode 100644 index 0000000..86751af --- /dev/null +++ ql/src/test/queries/clientpositive/input12_hadoop20.q @@ -0,0 +1,23 @@ +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200; + +SELECT dest1.* FROM dest1; +SELECT dest2.* FROM dest2; +SELECT dest3.* FROM dest3; diff --git ql/src/test/queries/clientpositive/input39.q ql/src/test/queries/clientpositive/input39.q index 7d457e2..f24f7d4 100644 --- ql/src/test/queries/clientpositive/input39.q +++ ql/src/test/queries/clientpositive/input39.q @@ -1,4 +1,4 @@ - +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) create table t1(key string, value string) partitioned by (ds string); @@ -15,7 +15,8 @@ select key, value from src; set hive.test.mode=true; set hive.mapred.mode=strict; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; explain @@ -24,7 +25,5 @@ select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; set hive.test.mode=false; -set mapred.job.tracker; - - - +set mapreduce.framework.name; +set mapreduce.jobtracker.address; diff --git ql/src/test/queries/clientpositive/input39_hadoop20.q ql/src/test/queries/clientpositive/input39_hadoop20.q new file mode 100644 index 0000000..3c5f2c5 --- /dev/null +++ ql/src/test/queries/clientpositive/input39_hadoop20.q @@ -0,0 +1,30 @@ +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string); +create table t2(key string, value string) partitioned by (ds string); + +insert overwrite table t1 partition (ds='1') +select key, value from src; + +insert overwrite table t1 partition (ds='2') +select key, value from src; + +insert overwrite table t2 partition (ds='1') +select key, value from src; + +set hive.test.mode=true; +set hive.mapred.mode=strict; +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +explain +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; + +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1'; + +set hive.test.mode=false; +set mapred.job.tracker; + + + diff --git ql/src/test/queries/clientpositive/join14.q ql/src/test/queries/clientpositive/join14.q index b4e3d81..b930b29 100644 --- ql/src/test/queries/clientpositive/join14.q +++ ql/src/test/queries/clientpositive/join14.q @@ -1,7 +1,11 @@ +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; -set mapred.job.tracker=does.notexist.com:666; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; set hive.exec.mode.local.auto=true; +set hive.exec.mode.local.auto.input.files.max=6; EXPLAIN FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 diff --git ql/src/test/queries/clientpositive/join14_hadoop20.q ql/src/test/queries/clientpositive/join14_hadoop20.q new file mode 100644 index 0000000..0b98138 --- /dev/null +++ ql/src/test/queries/clientpositive/join14_hadoop20.q @@ -0,0 +1,15 @@ +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE; + +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto=true; + +EXPLAIN +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value; + +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value; + +select dest1.* from dest1; diff --git ql/src/test/queries/clientpositive/leftsemijoin.q ql/src/test/queries/clientpositive/leftsemijoin.q index 853267d..abe3d33 100644 --- ql/src/test/queries/clientpositive/leftsemijoin.q +++ ql/src/test/queries/clientpositive/leftsemijoin.q @@ -13,11 +13,11 @@ load data local inpath '../data/files/sales.txt' INTO TABLE sales; load data local inpath '../data/files/things.txt' INTO TABLE things partition(ds='2011-10-23'); load data local inpath '../data/files/things2.txt' INTO TABLE things partition(ds='2011-10-24'); -SELECT name,id FROM sales; +SELECT name,id FROM sales ORDER BY name ASC, id ASC; -SELECT id,name FROM things; +SELECT id,name FROM things ORDER BY id ASC, name ASC; -SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id); +SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) ORDER BY name ASC, id ASC; drop table sales; drop table things; diff --git ql/src/test/queries/clientpositive/query_properties.q ql/src/test/queries/clientpositive/query_properties.q index 173c850..434ebd4 100644 --- ql/src/test/queries/clientpositive/query_properties.q +++ ql/src/test/queries/clientpositive/query_properties.q @@ -1,14 +1,14 @@ set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.CheckQueryPropertiesHook; -select * from src a join src b on a.key = b.key limit 1; -select * from src group by src.key, src.value limit 1; -select * from src order by src.key limit 1; -select * from src sort by src.key limit 1; -select a.key, sum(b.value) from src a join src b on a.key = b.key group by a.key limit 1; -select transform(*) using 'cat' from src limit 1; -select * from src distribute by src.key limit 1; -select * from src cluster by src.key limit 1; +select * from src a join src b on a.key = b.key limit 0; +select * from src group by src.key, src.value limit 0; +select * from src order by src.key limit 0; +select * from src sort by src.key limit 0; +select a.key, sum(b.value) from src a join src b on a.key = b.key group by a.key limit 0; +select transform(*) using 'cat' from src limit 0; +select * from src distribute by src.key limit 0; +select * from src cluster by src.key limit 0; -select key, sum(value) from (select a.key as key, b.value as value from src a join src b on a.key = b.key) c group by key limit 1; -select * from src a join src b on a.key = b.key order by a.key limit 1; -select * from src a join src b on a.key = b.key distribute by a.key sort by a.key, b.value limit 1; \ No newline at end of file +select key, sum(value) from (select a.key as key, b.value as value from src a join src b on a.key = b.key) c group by key limit 0; +select * from src a join src b on a.key = b.key order by a.key limit 0; +select * from src a join src b on a.key = b.key distribute by a.key sort by a.key, b.value limit 0; diff --git ql/src/test/queries/clientpositive/repair.q ql/src/test/queries/clientpositive/repair.q index 5da6a98..7f9b1b7 100644 --- ql/src/test/queries/clientpositive/repair.q +++ ql/src/test/queries/clientpositive/repair.q @@ -1,11 +1,11 @@ - +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING); MSCK TABLE repairtable; -dfs -mkdir ../build/ql/test/data/warehouse/repairtable/p1=a/p2=a; -dfs -mkdir ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a; +dfs -mkdir -p ../build/ql/test/data/warehouse/repairtable/p1=a/p2=a; +dfs -mkdir -p ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a; MSCK TABLE repairtable; diff --git ql/src/test/queries/clientpositive/repair_hadoop20.q ql/src/test/queries/clientpositive/repair_hadoop20.q new file mode 100644 index 0000000..7fb8708 --- /dev/null +++ ql/src/test/queries/clientpositive/repair_hadoop20.q @@ -0,0 +1,16 @@ +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING); + +MSCK TABLE repairtable; + +dfs -mkdir ../build/ql/test/data/warehouse/repairtable/p1=a/p2=a; +dfs -mkdir ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a; + +MSCK TABLE repairtable; + +MSCK REPAIR TABLE repairtable; + +MSCK TABLE repairtable; + + diff --git ql/src/test/queries/clientpositive/sample_islocalmode_hook.q ql/src/test/queries/clientpositive/sample_islocalmode_hook.q index 871ae05..f4f3d91 100644 --- ql/src/test/queries/clientpositive/sample_islocalmode_hook.q +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook.q @@ -10,6 +10,8 @@ set mapred.min.split.size.per.rack=300; set hive.exec.mode.local.auto=true; set hive.merge.smallfiles.avgsize=1; +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + -- create file inputs create table sih_i_part (key int, value string) partitioned by (p string); insert overwrite table sih_i_part partition (p='1') select key, value from src; @@ -18,21 +20,24 @@ insert overwrite table sih_i_part partition (p='3') select key+20000, value from create table sih_src as select key, value from sih_i_part order by key, value; create table sih_src2 as select key, value from sih_src order by key, value; -set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.VerifyIsLocalModeHook ; -set mapred.job.tracker=does.notexist.com:666; -set hive.exec.mode.local.auto.input.files.max=1; +set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.VerifyIsLocalModeHook; +set mapreduce.framework.name=yarn; +set mapreduce.jobtracker.address=localhost:58; +set hive.sample.seednumber=7; + +-- Relaxing hive.exec.mode.local.auto.input.files.max=1. +-- Hadoop20 will not generate more splits than there are files (one). +-- Hadoop23 generate splits correctly (four), hence the max needs to be adjusted to ensure running in local mode. +-- Default value is hive.exec.mode.local.auto.input.files.max=4 which produces expected behavior on Hadoop23. +-- hive.sample.seednumber is required because Hadoop23 generates multiple splits and tablesample is non-repeatable without it. -- sample split, running locally limited by num tasks select count(1) from sih_src tablesample(1 percent); -set mapred.job.tracker=does.notexist.com:666; - -- sample two tables -select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key; +select count(1) from sih_src tablesample(1 percent) a join sih_src2 tablesample(1 percent) b on a.key = b.key; set hive.exec.mode.local.auto.inputbytes.max=1000; -set hive.exec.mode.local.auto.input.files.max=4; -set mapred.job.tracker=does.notexist.com:666; -- sample split, running locally limited by max bytes select count(1) from sih_src tablesample(1 percent); diff --git ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q new file mode 100644 index 0000000..630dd60 --- /dev/null +++ ql/src/test/queries/clientpositive/sample_islocalmode_hook_hadoop20.q @@ -0,0 +1,47 @@ +drop table if exists sih_i_part; +drop table if exists sih_src; +drop table if exists sih_src2; + +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.max.split.size=300; +set mapred.min.split.size=300; +set mapred.min.split.size.per.node=300; +set mapred.min.split.size.per.rack=300; +set hive.exec.mode.local.auto=true; +set hive.merge.smallfiles.avgsize=1; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string); +insert overwrite table sih_i_part partition (p='1') select key, value from src; +insert overwrite table sih_i_part partition (p='2') select key+10000, value from src; +insert overwrite table sih_i_part partition (p='3') select key+20000, value from src; +create table sih_src as select key, value from sih_i_part order by key, value; +create table sih_src2 as select key, value from sih_src order by key, value; + +set hive.exec.post.hooks = org.apache.hadoop.hive.ql.hooks.VerifyIsLocalModeHook ; +set mapred.job.tracker=localhost:58; +set hive.exec.mode.local.auto.input.files.max=1; +set hive.sample.seednumber=7; + +-- sample split, running locally limited by num tasks +select count(1) from sih_src tablesample(1 percent); + +set mapred.job.tracker=localhost:58; +set hive.sample.seednumber=7; + +-- sample two tables +select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key; + +set hive.exec.mode.local.auto.inputbytes.max=1000; +set hive.exec.mode.local.auto.input.files.max=4; +set mapred.job.tracker=localhost:58; +set hive.sample.seednumber=7; + +-- sample split, running locally limited by max bytes +select count(1) from sih_src tablesample(1 percent); + +drop table sih_i_part; +drop table sih_src; +drop table sih_src2; diff --git ql/src/test/queries/clientpositive/split_sample.q ql/src/test/queries/clientpositive/split_sample.q index 1e913d9..3d1e417 100644 --- ql/src/test/queries/clientpositive/split_sample.q +++ ql/src/test/queries/clientpositive/split_sample.q @@ -12,6 +12,15 @@ set mapred.min.split.size=300; set mapred.min.split.size.per.node=300; set mapred.min.split.size.per.rack=300; set hive.merge.smallfiles.avgsize=1; +set hive.sample.seednumber=7; + +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. -- create multiple file inputs (two enable multiple splits) create table ss_i_part (key int, value string) partitioned by (p string); @@ -40,6 +49,7 @@ set hive.sample.seednumber=5; create table ss_t5 as select sum(key) % 397 as s from ss_src3 tablesample(1 percent) limit 10; select sum(s) from (select s from ss_t3 union all select s from ss_t4 union all select s from ss_t5) t; +set hive.sample.seednumber=7; -- sample more than one split explain select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; select count(distinct key) from ss_src2 tablesample(70 percent) limit 10; diff --git ql/src/test/resources/core-site.xml ql/src/test/resources/core-site.xml new file mode 100644 index 0000000..dde8ff9 --- /dev/null +++ ql/src/test/resources/core-site.xml @@ -0,0 +1,27 @@ + + + + + + + + + yarn.app.mapreduce.am.job.node-blacklisting.enable + false + + diff --git ql/src/test/results/clientnegative/autolocal1.q.out ql/src/test/results/clientnegative/autolocal1.q.out index 6af2b4e..4f12bf8 100644 --- ql/src/test/results/clientnegative/autolocal1.q.out +++ ql/src/test/results/clientnegative/autolocal1.q.out @@ -1,4 +1,14 @@ -PREHOOK: query: SELECT key FROM src +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- hadoop0.23 changes the behavior of JobClient initialization +-- in hadoop0.20, JobClient initialization tries to get JobTracker's address +-- this throws the expected IllegalArgumentException +-- in hadoop0.23, JobClient initialization only initializes cluster +-- and get user group information +-- not attempts to get JobTracker's address +-- no IllegalArgumentException thrown in JobClient Initialization +-- an exception is thrown when JobClient submitJob + +SELECT key FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientnegative/local_mapred_error_cache.q.out ql/src/test/results/clientnegative/local_mapred_error_cache.q.out index caec723..d249bbc 100644 --- ql/src/test/results/clientnegative/local_mapred_error_cache.q.out +++ ql/src/test/results/clientnegative/local_mapred_error_cache.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientnegative/local_mapred_error_cache_hadoop20.q.out ql/src/test/results/clientnegative/local_mapred_error_cache_hadoop20.q.out new file mode 100644 index 0000000..fe68028 --- /dev/null +++ ql/src/test/results/clientnegative/local_mapred_error_cache_hadoop20.q.out @@ -0,0 +1,26 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +FROM src SELECT TRANSFORM(key, value) USING 'python ../data/scripts/cat_error.py' AS (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +Execution failed with exit status: 2 +Obtaining error information + +Task failed! +Task ID: + Stage-1 + +Logs: + +#### A masked pattern was here #### +ID: Stage-1 +org.apache.hadoop.hive.ql.metadata.HiveException: Hit error while closing .. +#### A masked pattern was here #### +org.apache.hadoop.hive.ql.metadata.HiveException: Hit error while closing .. +#### A masked pattern was here #### +org.apache.hadoop.hive.ql.metadata.HiveException: Hit error while closing .. +#### A masked pattern was here #### +Ended Job = job_local_0001 with errors +Error during job, obtaining debugging information... +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask diff --git ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out index b199241..473c2db 100644 --- ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out +++ ql/src/test/results/clientnegative/mapreduce_stack_trace.q.out @@ -1,13 +1,7 @@ -PREHOOK: query: FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} -FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} -Hive Runtime Error while processing row {"key":"238","value":"val_238"} FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask diff --git ql/src/test/results/clientnegative/mapreduce_stack_trace_hadoop20.q.out ql/src/test/results/clientnegative/mapreduce_stack_trace_hadoop20.q.out new file mode 100644 index 0000000..465d867 --- /dev/null +++ ql/src/test/results/clientnegative/mapreduce_stack_trace_hadoop20.q.out @@ -0,0 +1,15 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +FROM src SELECT TRANSFORM(key, value) USING 'script_does_not_exist' AS (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} +Hive Runtime Error while processing row {"key":"238","value":"val_238"} +FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} +Hive Runtime Error while processing row {"key":"238","value":"val_238"} +FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} +Hive Runtime Error while processing row {"key":"238","value":"val_238"} +FATAL ExecMapper: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row {"key":"238","value":"val_238"} +Hive Runtime Error while processing row {"key":"238","value":"val_238"} +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask diff --git ql/src/test/results/clientpositive/auto_join14.q.out ql/src/test/results/clientpositive/auto_join14.q.out index 18a1b57..d79e00c 100644 --- ql/src/test/results/clientpositive/auto_join14.q.out +++ ql/src/test/results/clientpositive/auto_join14.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: explain diff --git ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out new file mode 100644 index 0000000..7814e0d --- /dev/null +++ ql/src/test/results/clientpositive/auto_join14_hadoop20.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME srcpart)) (and (AND (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL srcpart) key)) (= (. (TOK_TABLE_OR_COL srcpart) ds) '2008-04-08')) (> (. (TOK_TABLE_OR_COL src) key) 100)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) value))))) + +STAGE DEPENDENCIES: + Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 + Stage-7 has a backup stage: Stage-1 + Stage-4 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-2 depends on stages: Stage-0 + Stage-8 has a backup stage: Stage-1 + Stage-5 depends on stages: Stage-8 + Stage-1 + +STAGE PLANS: + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + srcpart + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + srcpart + TableScan + alias: srcpart + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 0 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + HashTable Sink Operator + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + srcpart + TableScan + alias: srcpart + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + +PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(dest1.c1,dest1.c2)) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +404554174174 diff --git ql/src/test/results/clientpositive/combine2.q.out ql/src/test/results/clientpositive/combine2.q.out index 6c56de5..9ec72b9 100644 --- ql/src/test/results/clientpositive/combine2.q.out +++ ql/src/test/results/clientpositive/combine2.q.out @@ -1,9 +1,21 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE PREHOOK: query: create table combine2(key string) partitioned by (value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table combine2(key string) partitioned by (value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@combine2 -PREHOOK: query: insert overwrite table combine2 partition(value) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 union all @@ -13,7 +25,15 @@ select * from ( PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@combine2 -POSTHOOK: query: insert overwrite table combine2 partition(value) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) select * from ( select key, value from src where key < 10 union all @@ -239,7 +259,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -265,7 +285,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 1 + numFiles 3 numPartitions 8 numRows 3 partition_columns value @@ -285,7 +305,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -331,7 +351,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -377,7 +397,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -403,7 +423,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 1 + numFiles 3 numPartitions 8 numRows 3 partition_columns value @@ -423,7 +443,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -469,7 +489,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -515,7 +535,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value @@ -561,7 +581,7 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.combine2 - numFiles 8 + numFiles 12 numPartitions 8 numRows 12 partition_columns value diff --git ql/src/test/results/clientpositive/combine2_hadoop20.q.out ql/src/test/results/clientpositive/combine2_hadoop20.q.out new file mode 100644 index 0000000..c73f2c5 --- /dev/null +++ ql/src/test/results/clientpositive/combine2_hadoop20.q.out @@ -0,0 +1,769 @@ +PREHOOK: query: USE default +PREHOOK: type: SWITCHDATABASE +POSTHOOK: query: USE default +POSTHOOK: type: SWITCHDATABASE +PREHOOK: query: create table combine2(key string) partitioned by (value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table combine2(key string) partitioned by (value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@combine2 +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) +select * from ( + select key, value from src where key < 10 + union all + select key, '|' as value from src where key = 11 + union all + select key, '2010-04-21 09:45:00' value from src where key = 19) s +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine2 +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=256 and hive.merge.smallfiles.avgsize=0 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results results of this test. +-- This issue was fixed in MAPREDUCE-2046 which is included in 0.22. + +insert overwrite table combine2 partition(value) +select * from ( + select key, value from src where key < 10 + union all + select key, '|' as value from src where key = 11 + union all + select key, '2010-04-21 09:45:00' value from src where key = 19) s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine2@value=2010-04-21 09%3A45%3A00 +POSTHOOK: Output: default@combine2@value=val_0 +POSTHOOK: Output: default@combine2@value=val_2 +POSTHOOK: Output: default@combine2@value=val_4 +POSTHOOK: Output: default@combine2@value=val_5 +POSTHOOK: Output: default@combine2@value=val_8 +POSTHOOK: Output: default@combine2@value=val_9 +POSTHOOK: Output: default@combine2@value=| +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: show partitions combine2 +PREHOOK: type: SHOWPARTITIONS +POSTHOOK: query: show partitions combine2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +value=2010-04-21 09%3A45%3A00 +value=val_0 +value=val_2 +value=val_4 +value=val_5 +value=val_8 +value=val_9 +value=| +PREHOOK: query: explain +select key, value from combine2 where value is not null order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value from combine2 where value is not null order by key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME combine2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + combine2 + TableScan + alias: combine2 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select key, value from combine2 where value is not null order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +PREHOOK: Input: default@combine2@value=val_0 +PREHOOK: Input: default@combine2@value=val_2 +PREHOOK: Input: default@combine2@value=val_4 +PREHOOK: Input: default@combine2@value=val_5 +PREHOOK: Input: default@combine2@value=val_8 +PREHOOK: Input: default@combine2@value=val_9 +PREHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: query: select key, value from combine2 where value is not null order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +POSTHOOK: Input: default@combine2@value=val_0 +POSTHOOK: Input: default@combine2@value=val_2 +POSTHOOK: Input: default@combine2@value=val_4 +POSTHOOK: Input: default@combine2@value=val_5 +POSTHOOK: Input: default@combine2@value=val_8 +POSTHOOK: Input: default@combine2@value=val_9 +POSTHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +11 | +19 2010-04-21 09:45:00 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: explain extended +select count(1) from combine2 where value is not null +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select count(1) from combine2 where value is not null +POSTHOOK: type: QUERY +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME combine2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + combine2 + TableScan + alias: combine2 + GatherStats: false + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: value=2010-04-21 09%3A45%3A00 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value 2010-04-21 09:45:00 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 2 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_0 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_0 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 3 + partition_columns value + rawDataSize 3 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 6 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_2 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_4 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_5 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_5 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 3 + partition_columns value + rawDataSize 3 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 6 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_8 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_8 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=val_9 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value val_9 + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 1 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 +#### A masked pattern was here #### + Partition + base file name: value=| + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + value | + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 1 + numPartitions 8 + numRows 1 + partition_columns value + rawDataSize 2 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key + columns.types string +#### A masked pattern was here #### + name default.combine2 + numFiles 8 + numPartitions 8 + numRows 12 + partition_columns value + rawDataSize 14 + serialization.ddl struct combine2 { string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 26 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.combine2 + name: default.combine2 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + serialization.format 1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from combine2 where value is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +PREHOOK: Input: default@combine2@value=val_0 +PREHOOK: Input: default@combine2@value=val_2 +PREHOOK: Input: default@combine2@value=val_4 +PREHOOK: Input: default@combine2@value=val_5 +PREHOOK: Input: default@combine2@value=val_8 +PREHOOK: Input: default@combine2@value=val_9 +PREHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from combine2 where value is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine2@value=2010-04-21 09%3A45%3A00 +POSTHOOK: Input: default@combine2@value=val_0 +POSTHOOK: Input: default@combine2@value=val_2 +POSTHOOK: Input: default@combine2@value=val_4 +POSTHOOK: Input: default@combine2@value=val_5 +POSTHOOK: Input: default@combine2@value=val_8 +POSTHOOK: Input: default@combine2@value=val_9 +POSTHOOK: Input: default@combine2@value=| +#### A masked pattern was here #### +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +12 +PREHOOK: query: explain +select ds, count(1) from srcpart where ds is not null group by ds +PREHOOK: type: QUERY +POSTHOOK: query: explain +select ds, count(1) from srcpart where ds is not null group by ds +POSTHOOK: type: QUERY +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + srcpart + TableScan + alias: srcpart + Select Operator + expressions: + expr: ds + type: string + outputColumnNames: ds + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: ds + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select ds, count(1) from srcpart where ds is not null group by ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: combine2 PARTITION(value=2010-04-21 09:45:00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_0).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_4).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_8).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=val_9).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine2 PARTITION(value=|).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] +2008-04-08 1000 +2008-04-09 1000 diff --git ql/src/test/results/clientpositive/ctas.q.out ql/src/test/results/clientpositive/ctas.q.out index e11186b..d4c12b5 100644 --- ql/src/test/results/clientpositive/ctas.q.out +++ ql/src/test/results/clientpositive/ctas.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: create table nzhang_Tmp(a int, b string) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table nzhang_Tmp(a int, b string) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@nzhang_Tmp PREHOOK: query: select * from nzhang_Tmp diff --git ql/src/test/results/clientpositive/ctas_hadoop20.q.out ql/src/test/results/clientpositive/ctas_hadoop20.q.out new file mode 100644 index 0000000..67c5242 --- /dev/null +++ ql/src/test/results/clientpositive/ctas_hadoop20.q.out @@ -0,0 +1,938 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +create table nzhang_Tmp(a int, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@nzhang_Tmp +PREHOOK: query: select * from nzhang_Tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_tmp +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_Tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_tmp +#### A masked pattern was here #### +PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_CTAS1) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL k)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.nzhang_CTAS1 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: k string, value string + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_CTAS1 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_CTAS1 +PREHOOK: query: select * from nzhang_CTAS1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas1 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_CTAS1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS1 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS1 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +k string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 96 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas2 as select * from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas2) TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.nzhang_ctas2 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_ctas2 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas2 +PREHOOK: query: select * from nzhang_ctas2 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas2 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS2 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 96 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas3) TOK_LIKETABLE (TOK_TABLESERIALIZER (TOK_SERDENAME "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) TOK_TBLRCFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (/ (TOK_TABLE_OR_COL key) 2) half_key) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_TABLE_OR_COL value) "_con") conb)) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL half_key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL conb))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: (key / 2) + type: double + expr: concat(value, '_con') + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: double + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: double + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + name: default.nzhang_ctas3 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: half_key double, conb string + if not exists: false + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde name: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: nzhang_ctas3 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas3 +PREHOOK: query: select * from nzhang_ctas3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +0.0 val_0_con +0.0 val_0_con +0.0 val_0_con +1.0 val_2_con +2.0 val_4_con +2.5 val_5_con +2.5 val_5_con +2.5 val_5_con +4.0 val_8_con +4.5 val_9_con +PREHOOK: query: describe formatted nzhang_CTAS3 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS3 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +half_key double None +conb string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 120 + totalSize 294 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas3) TOK_IFNOTEXISTS TOK_LIKETABLE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 2)))) + +STAGE DEPENDENCIES: + +STAGE PLANS: +STAGE PLANS: +PREHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table if not exists nzhang_ctas3 as select key, value from src sort by key, value limit 2 +POSTHOOK: type: CREATETABLE +PREHOOK: query: select * from nzhang_ctas3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas3 +#### A masked pattern was here #### +0.0 val_0_con +0.0 val_0_con +0.0 val_0_con +1.0 val_2_con +2.0 val_4_con +2.5 val_5_con +2.5 val_5_con +2.5 val_5_con +4.0 val_8_con +4.5 val_9_con +PREHOOK: query: describe formatted nzhang_CTAS3 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS3 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +half_key double None +conb string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 120 + totalSize 294 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe +InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas4) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD ','))) TOK_TBLTEXTFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + name: default.nzhang_ctas4 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + field delimiter: , + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_ctas4 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas4 +PREHOOK: query: select * from nzhang_ctas4 +PREHOOK: type: QUERY +PREHOOK: Input: default@nzhang_ctas4 +#### A masked pattern was here #### +POSTHOOK: query: select * from nzhang_ctas4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nzhang_ctas4 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +PREHOOK: query: describe formatted nzhang_CTAS4 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe formatted nzhang_CTAS4 +POSTHOOK: type: DESCTABLE +# col_name data_type comment + +key string None +value string None + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + numFiles 1 + numPartitions 0 + numRows 10 + rawDataSize 96 + totalSize 106 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + field.delim , + serialization.format , +PREHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain extended create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE (TOK_TABNAME nzhang_ctas5) TOK_LIKETABLE (TOK_TABLEROWFORMAT (TOK_SERDEPROPS (TOK_TABLEROWFORMATFIELD ',') (TOK_TABLEROWFORMATLINES '\012'))) TOK_TBLTEXTFILE (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 10)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string +#### A masked pattern was here #### + name default.src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,string + escape.delim \ + Reduce Operator Tree: + Extract + Limit + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + field.delim , + line.delim + + name default.nzhang_ctas5 + serialization.format , + name: default.nzhang_ctas5 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: key string, value string + field delimiter: , + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + line delimiter: + + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: nzhang_ctas5 + isExternal: false + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +POSTHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas5 +PREHOOK: query: create table nzhang_ctas6 (key string, `to` string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table nzhang_ctas6 (key string, `to` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@nzhang_ctas6 +PREHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@nzhang_ctas6 +POSTHOOK: query: insert overwrite table nzhang_ctas6 select key, value from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@nzhang_ctas6 +POSTHOOK: Lineage: nzhang_ctas6.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_ctas6.to SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table nzhang_ctas7 as select key, `to` from nzhang_ctas6 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@nzhang_ctas6 +POSTHOOK: query: create table nzhang_ctas7 as select key, `to` from nzhang_ctas6 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@nzhang_ctas6 +POSTHOOK: Output: default@nzhang_ctas7 +POSTHOOK: Lineage: nzhang_ctas6.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: nzhang_ctas6.to SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index f25a1c2..461f8d6 100644 --- ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -212,11 +212,11 @@ POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: query: SELECT DEST1.* FROM DEST1 ORDER BY key ASC, value ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 ORDER BY key ASC, value ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### @@ -226,19 +226,19 @@ POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] 0 0.0 10 10.0 +11 11.0 100 200.0 103 206.0 104 208.0 105 105.0 -11 11.0 111 111.0 113 226.0 114 114.0 -PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: query: SELECT DEST2.* FROM DEST2 ORDER BY key ASC, value ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest2 #### A masked pattern was here #### -POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 ORDER BY key ASC, value ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### @@ -248,11 +248,11 @@ POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] 0 0.0 10 10.0 +11 11.0 100 200.0 103 206.0 104 208.0 105 105.0 -11 11.0 111 111.0 113 226.0 114 114.0 diff --git ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out index c7d77ab..9e2979a 100644 --- ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out @@ -245,11 +245,11 @@ POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ] -PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: query: SELECT DEST1.* FROM DEST1 ORDER BY key[0] ASC, value ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### -POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 ORDER BY key[0] ASC, value ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### @@ -267,11 +267,11 @@ POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ] ["111"] 1 ["113"] 2 ["114"] 1 -PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: query: SELECT DEST2.* FROM DEST2 ORDER BY 1 ASC, value ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest2 #### A masked pattern was here #### -POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 ORDER BY 1 ASC, value ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### @@ -279,13 +279,13 @@ POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:str POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ] -{"0":"val_0"} 3 {"10":"val_10"} 1 -{"100":"val_100"} 2 -{"103":"val_103"} 2 -{"104":"val_104"} 2 {"105":"val_105"} 1 {"11":"val_11"} 1 {"111":"val_111"} 1 -{"113":"val_113"} 2 {"114":"val_114"} 1 +{"100":"val_100"} 2 +{"103":"val_103"} 2 +{"104":"val_104"} 2 +{"113":"val_113"} 2 +{"0":"val_0"} 3 diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 37aa6dc..7830697 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -297,11 +297,11 @@ POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type: POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest_g2 +PREHOOK: query: SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_g2 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_g2 +POSTHOOK: query: SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g2 #### A masked pattern was here #### @@ -325,11 +325,11 @@ POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:str 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 -PREHOOK: query: SELECT * FROM dest_g3 +PREHOOK: query: SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_g3 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_g3 +POSTHOOK: query: SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g3 #### A masked pattern was here #### @@ -353,11 +353,11 @@ POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:str 2 69 225571.0 15780 111 3 62 332004.0 20119 99 4 74 452763.0 30965 124 -PREHOOK: query: SELECT * FROM dest_g4 +PREHOOK: query: SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_g4 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_g4 +POSTHOOK: query: SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g4 #### A masked pattern was here #### @@ -909,11 +909,11 @@ POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -PREHOOK: query: SELECT * FROM dest_g2 +PREHOOK: query: SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_g2 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_g2 +POSTHOOK: query: SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g2 #### A masked pattern was here #### @@ -962,11 +962,11 @@ POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:s 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 -PREHOOK: query: SELECT * FROM dest_g3 +PREHOOK: query: SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_g3 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_g3 +POSTHOOK: query: SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g3 #### A masked pattern was here #### @@ -1015,11 +1015,11 @@ POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:s 2 69 225571.0 15780 111 3 62 332004.0 20119 99 4 74 452763.0 30965 124 -PREHOOK: query: SELECT * FROM dest_g4 +PREHOOK: query: SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_g4 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_g4 +POSTHOOK: query: SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_g4 #### A masked pattern was here #### @@ -1073,11 +1073,11 @@ POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:s 7 6 7735.0 447 10 8 8 8762.0 595 10 9 7 91047.0 577 12 -PREHOOK: query: SELECT * FROM dest_h2 +PREHOOK: query: SELECT * FROM dest_h2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_h2 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_h2 +POSTHOOK: query: SELECT * FROM dest_h2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_h2 #### A masked pattern was here #### @@ -1122,20 +1122,20 @@ POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type: POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] 0 1 00.0 0 3 +1 4 1878.0 878 6 1 5 1729.0 729 8 -1 7 11171.0 1171 11 1 6 11282.0 1282 12 1 6 11494.0 1494 11 -1 4 1878.0 878 6 +1 7 11171.0 1171 11 +1 7 11516.0 1516 10 1 8 11263.0 1263 10 -1 9 12654.0 2654 16 1 9 12294.0 2294 14 -1 7 11516.0 1516 10 -PREHOOK: query: SELECT * FROM dest_h3 +1 9 12654.0 2654 16 +PREHOOK: query: SELECT * FROM dest_h3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC PREHOOK: type: QUERY PREHOOK: Input: default@dest_h3 #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM dest_h3 +POSTHOOK: query: SELECT * FROM dest_h3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_h3 #### A masked pattern was here #### @@ -1179,38 +1179,38 @@ POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:st POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -5 1 515.0 15 3 5 1 5102.0 102 2 +5 1 5116.0 116 2 +5 1 515.0 15 3 5 1 553.0 53 1 5 1 554.0 54 1 5 1 557.0 57 1 -5 1 5116.0 116 2 +6 1 6134.0 134 2 6 1 664.0 64 1 6 1 665.0 65 1 6 1 666.0 66 1 -6 1 6134.0 134 2 6 1 669.0 69 1 -7 1 7210.0 210 3 7 1 7144.0 144 2 -7 1 774.0 74 1 7 1 7152.0 152 2 +7 1 7210.0 210 3 +7 1 774.0 74 1 7 1 777.0 77 1 7 1 778.0 78 1 +8 1 8166.0 166 2 +8 1 8168.0 168 2 8 1 88.0 8 1 8 1 880.0 80 1 8 1 882.0 82 1 -8 1 8166.0 166 2 -8 1 8168.0 168 2 8 1 885.0 85 1 8 1 886.0 86 1 8 1 887.0 87 1 -9 1 99.0 9 1 -9 1 9270.0 270 3 -9 1 992.0 92 1 9 1 9190.0 190 2 -9 1 996.0 96 1 9 1 9194.0 194 2 9 1 9196.0 196 2 +9 1 9270.0 270 3 +9 1 99.0 9 1 +9 1 992.0 92 1 +9 1 996.0 96 1 PREHOOK: query: DROP TABLE dest_g2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@dest_g2 diff --git ql/src/test/results/clientpositive/input12.q.out ql/src/test/results/clientpositive/input12.q.out index 2929907..96a3dd5 100644 --- ql/src/test/results/clientpositive/input12.q.out +++ ql/src/test/results/clientpositive/input12.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE diff --git ql/src/test/results/clientpositive/input12_hadoop20.q.out ql/src/test/results/clientpositive/input12_hadoop20.q.out new file mode 100644 index 0000000..518ca44 --- /dev/null +++ ql/src/test/results/clientpositive/input12_hadoop20.q.out @@ -0,0 +1,805 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest3(key INT) PARTITIONED BY(ds STRING, hr STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest3 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL src) key) 100))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value))) (TOK_WHERE (and (>= (. (TOK_TABLE_OR_COL src) key) 100) (< (. (TOK_TABLE_OR_COL src) key) 200)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest3) (TOK_PARTSPEC (TOK_PARTVAL ds '2008-04-08') (TOK_PARTVAL hr '12')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key))) (TOK_WHERE (>= (. (TOK_TABLE_OR_COL src) key) 200)))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-7 depends on stages: Stage-3 , consists of Stage-6, Stage-5 + Stage-6 + Stage-0 depends on stages: Stage-6, Stage-5 + Stage-4 depends on stages: Stage-0 + Stage-5 + Stage-11 depends on stages: Stage-3 , consists of Stage-10, Stage-9 + Stage-10 + Stage-1 depends on stages: Stage-10, Stage-9 + Stage-8 depends on stages: Stage-1 + Stage-9 + Stage-15 depends on stages: Stage-3 , consists of Stage-14, Stage-13 + Stage-14 + Stage-2 depends on stages: Stage-14, Stage-13 + Stage-12 depends on stages: Stage-2 + Stage-13 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key < 100.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Filter Operator + predicate: + expr: ((key >= 100.0) and (key < 200.0)) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Filter Operator + predicate: + expr: (key >= 200.0) + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 3 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-11 + Conditional Operator + + Stage: Stage-10 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-8 + Stats-Aggr Operator + + Stage: Stage-9 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-15 + Conditional Operator + + Stage: Stage-14 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-2 + Move Operator + tables: + partition: + ds 2008-04-08 + hr 12 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-12 + Stats-Aggr Operator + + Stage: Stage-13 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +PREHOOK: Output: default@dest3@ds=2008-04-08/hr=12 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 +INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 +INSERT OVERWRITE TABLE dest3 PARTITION(ds='2008-04-08', hr='12') SELECT src.key WHERE src.key >= 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Output: default@dest3@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +86 val_86 +27 val_27 +98 val_98 +66 val_66 +37 val_37 +15 val_15 +82 val_82 +17 val_17 +0 val_0 +57 val_57 +20 val_20 +92 val_92 +47 val_47 +72 val_72 +4 val_4 +35 val_35 +54 val_54 +51 val_51 +65 val_65 +83 val_83 +12 val_12 +67 val_67 +84 val_84 +58 val_58 +8 val_8 +24 val_24 +42 val_42 +0 val_0 +96 val_96 +26 val_26 +51 val_51 +43 val_43 +95 val_95 +98 val_98 +85 val_85 +77 val_77 +0 val_0 +87 val_87 +15 val_15 +72 val_72 +90 val_90 +19 val_19 +10 val_10 +5 val_5 +58 val_58 +35 val_35 +95 val_95 +11 val_11 +34 val_34 +42 val_42 +78 val_78 +76 val_76 +41 val_41 +30 val_30 +64 val_64 +76 val_76 +74 val_74 +69 val_69 +33 val_33 +70 val_70 +5 val_5 +2 val_2 +35 val_35 +80 val_80 +44 val_44 +53 val_53 +90 val_90 +12 val_12 +5 val_5 +70 val_70 +24 val_24 +70 val_70 +83 val_83 +26 val_26 +67 val_67 +18 val_18 +9 val_9 +18 val_18 +97 val_97 +84 val_84 +28 val_28 +37 val_37 +90 val_90 +97 val_97 +PREHOOK: query: SELECT dest2.* FROM dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest2.* FROM dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +165 val_165 +193 val_193 +150 val_150 +128 val_128 +146 val_146 +152 val_152 +145 val_145 +166 val_166 +153 val_153 +193 val_193 +174 val_174 +199 val_199 +174 val_174 +162 val_162 +167 val_167 +195 val_195 +113 val_113 +155 val_155 +128 val_128 +149 val_149 +129 val_129 +170 val_170 +157 val_157 +111 val_111 +169 val_169 +125 val_125 +192 val_192 +187 val_187 +176 val_176 +138 val_138 +103 val_103 +176 val_176 +137 val_137 +180 val_180 +181 val_181 +138 val_138 +179 val_179 +172 val_172 +129 val_129 +158 val_158 +119 val_119 +197 val_197 +100 val_100 +199 val_199 +191 val_191 +165 val_165 +120 val_120 +131 val_131 +156 val_156 +196 val_196 +197 val_197 +187 val_187 +137 val_137 +169 val_169 +179 val_179 +118 val_118 +134 val_134 +138 val_138 +118 val_118 +177 val_177 +168 val_168 +143 val_143 +160 val_160 +195 val_195 +119 val_119 +149 val_149 +138 val_138 +103 val_103 +113 val_113 +167 val_167 +116 val_116 +191 val_191 +128 val_128 +193 val_193 +104 val_104 +175 val_175 +105 val_105 +190 val_190 +114 val_114 +164 val_164 +125 val_125 +164 val_164 +187 val_187 +104 val_104 +163 val_163 +119 val_119 +199 val_199 +120 val_120 +169 val_169 +178 val_178 +136 val_136 +172 val_172 +133 val_133 +175 val_175 +189 val_189 +134 val_134 +100 val_100 +146 val_146 +186 val_186 +167 val_167 +183 val_183 +152 val_152 +194 val_194 +126 val_126 +169 val_169 +PREHOOK: query: SELECT dest3.* FROM dest3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest3@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest3.* FROM dest3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest3@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3 PARTITION(ds=2008-04-08,hr=12).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +238 2008-04-08 12 +311 2008-04-08 12 +409 2008-04-08 12 +255 2008-04-08 12 +278 2008-04-08 12 +484 2008-04-08 12 +265 2008-04-08 12 +401 2008-04-08 12 +273 2008-04-08 12 +224 2008-04-08 12 +369 2008-04-08 12 +213 2008-04-08 12 +406 2008-04-08 12 +429 2008-04-08 12 +374 2008-04-08 12 +469 2008-04-08 12 +495 2008-04-08 12 +327 2008-04-08 12 +281 2008-04-08 12 +277 2008-04-08 12 +209 2008-04-08 12 +403 2008-04-08 12 +417 2008-04-08 12 +430 2008-04-08 12 +252 2008-04-08 12 +292 2008-04-08 12 +219 2008-04-08 12 +287 2008-04-08 12 +338 2008-04-08 12 +446 2008-04-08 12 +459 2008-04-08 12 +394 2008-04-08 12 +237 2008-04-08 12 +482 2008-04-08 12 +413 2008-04-08 12 +494 2008-04-08 12 +207 2008-04-08 12 +466 2008-04-08 12 +208 2008-04-08 12 +399 2008-04-08 12 +396 2008-04-08 12 +247 2008-04-08 12 +417 2008-04-08 12 +489 2008-04-08 12 +377 2008-04-08 12 +397 2008-04-08 12 +309 2008-04-08 12 +365 2008-04-08 12 +266 2008-04-08 12 +439 2008-04-08 12 +342 2008-04-08 12 +367 2008-04-08 12 +325 2008-04-08 12 +475 2008-04-08 12 +203 2008-04-08 12 +339 2008-04-08 12 +455 2008-04-08 12 +311 2008-04-08 12 +316 2008-04-08 12 +302 2008-04-08 12 +205 2008-04-08 12 +438 2008-04-08 12 +345 2008-04-08 12 +489 2008-04-08 12 +378 2008-04-08 12 +221 2008-04-08 12 +280 2008-04-08 12 +427 2008-04-08 12 +277 2008-04-08 12 +208 2008-04-08 12 +356 2008-04-08 12 +399 2008-04-08 12 +382 2008-04-08 12 +498 2008-04-08 12 +386 2008-04-08 12 +437 2008-04-08 12 +469 2008-04-08 12 +286 2008-04-08 12 +459 2008-04-08 12 +239 2008-04-08 12 +213 2008-04-08 12 +216 2008-04-08 12 +430 2008-04-08 12 +278 2008-04-08 12 +289 2008-04-08 12 +221 2008-04-08 12 +318 2008-04-08 12 +332 2008-04-08 12 +311 2008-04-08 12 +275 2008-04-08 12 +241 2008-04-08 12 +333 2008-04-08 12 +284 2008-04-08 12 +230 2008-04-08 12 +260 2008-04-08 12 +404 2008-04-08 12 +384 2008-04-08 12 +489 2008-04-08 12 +353 2008-04-08 12 +373 2008-04-08 12 +272 2008-04-08 12 +217 2008-04-08 12 +348 2008-04-08 12 +466 2008-04-08 12 +411 2008-04-08 12 +230 2008-04-08 12 +208 2008-04-08 12 +348 2008-04-08 12 +463 2008-04-08 12 +431 2008-04-08 12 +496 2008-04-08 12 +322 2008-04-08 12 +468 2008-04-08 12 +393 2008-04-08 12 +454 2008-04-08 12 +298 2008-04-08 12 +418 2008-04-08 12 +327 2008-04-08 12 +230 2008-04-08 12 +205 2008-04-08 12 +404 2008-04-08 12 +436 2008-04-08 12 +469 2008-04-08 12 +468 2008-04-08 12 +308 2008-04-08 12 +288 2008-04-08 12 +481 2008-04-08 12 +457 2008-04-08 12 +282 2008-04-08 12 +318 2008-04-08 12 +318 2008-04-08 12 +409 2008-04-08 12 +470 2008-04-08 12 +369 2008-04-08 12 +316 2008-04-08 12 +413 2008-04-08 12 +490 2008-04-08 12 +364 2008-04-08 12 +395 2008-04-08 12 +282 2008-04-08 12 +238 2008-04-08 12 +419 2008-04-08 12 +307 2008-04-08 12 +435 2008-04-08 12 +277 2008-04-08 12 +273 2008-04-08 12 +306 2008-04-08 12 +224 2008-04-08 12 +309 2008-04-08 12 +389 2008-04-08 12 +327 2008-04-08 12 +242 2008-04-08 12 +369 2008-04-08 12 +392 2008-04-08 12 +272 2008-04-08 12 +331 2008-04-08 12 +401 2008-04-08 12 +242 2008-04-08 12 +452 2008-04-08 12 +226 2008-04-08 12 +497 2008-04-08 12 +402 2008-04-08 12 +396 2008-04-08 12 +317 2008-04-08 12 +395 2008-04-08 12 +336 2008-04-08 12 +229 2008-04-08 12 +233 2008-04-08 12 +472 2008-04-08 12 +322 2008-04-08 12 +498 2008-04-08 12 +321 2008-04-08 12 +430 2008-04-08 12 +489 2008-04-08 12 +458 2008-04-08 12 +223 2008-04-08 12 +492 2008-04-08 12 +449 2008-04-08 12 +218 2008-04-08 12 +228 2008-04-08 12 +453 2008-04-08 12 +209 2008-04-08 12 +468 2008-04-08 12 +342 2008-04-08 12 +230 2008-04-08 12 +368 2008-04-08 12 +296 2008-04-08 12 +216 2008-04-08 12 +367 2008-04-08 12 +344 2008-04-08 12 +274 2008-04-08 12 +219 2008-04-08 12 +239 2008-04-08 12 +485 2008-04-08 12 +223 2008-04-08 12 +256 2008-04-08 12 +263 2008-04-08 12 +487 2008-04-08 12 +480 2008-04-08 12 +401 2008-04-08 12 +288 2008-04-08 12 +244 2008-04-08 12 +438 2008-04-08 12 +467 2008-04-08 12 +432 2008-04-08 12 +202 2008-04-08 12 +316 2008-04-08 12 +229 2008-04-08 12 +469 2008-04-08 12 +463 2008-04-08 12 +280 2008-04-08 12 +283 2008-04-08 12 +331 2008-04-08 12 +235 2008-04-08 12 +321 2008-04-08 12 +335 2008-04-08 12 +466 2008-04-08 12 +366 2008-04-08 12 +403 2008-04-08 12 +483 2008-04-08 12 +257 2008-04-08 12 +406 2008-04-08 12 +409 2008-04-08 12 +406 2008-04-08 12 +401 2008-04-08 12 +258 2008-04-08 12 +203 2008-04-08 12 +262 2008-04-08 12 +348 2008-04-08 12 +424 2008-04-08 12 +396 2008-04-08 12 +201 2008-04-08 12 +217 2008-04-08 12 +431 2008-04-08 12 +454 2008-04-08 12 +478 2008-04-08 12 +298 2008-04-08 12 +431 2008-04-08 12 +424 2008-04-08 12 +382 2008-04-08 12 +397 2008-04-08 12 +480 2008-04-08 12 +291 2008-04-08 12 +351 2008-04-08 12 +255 2008-04-08 12 +438 2008-04-08 12 +414 2008-04-08 12 +200 2008-04-08 12 +491 2008-04-08 12 +237 2008-04-08 12 +439 2008-04-08 12 +360 2008-04-08 12 +248 2008-04-08 12 +479 2008-04-08 12 +305 2008-04-08 12 +417 2008-04-08 12 +444 2008-04-08 12 +429 2008-04-08 12 +443 2008-04-08 12 +323 2008-04-08 12 +325 2008-04-08 12 +277 2008-04-08 12 +230 2008-04-08 12 +478 2008-04-08 12 +468 2008-04-08 12 +310 2008-04-08 12 +317 2008-04-08 12 +333 2008-04-08 12 +493 2008-04-08 12 +460 2008-04-08 12 +207 2008-04-08 12 +249 2008-04-08 12 +265 2008-04-08 12 +480 2008-04-08 12 +353 2008-04-08 12 +214 2008-04-08 12 +462 2008-04-08 12 +233 2008-04-08 12 +406 2008-04-08 12 +454 2008-04-08 12 +375 2008-04-08 12 +401 2008-04-08 12 +421 2008-04-08 12 +407 2008-04-08 12 +384 2008-04-08 12 +256 2008-04-08 12 +384 2008-04-08 12 +379 2008-04-08 12 +462 2008-04-08 12 +492 2008-04-08 12 +298 2008-04-08 12 +341 2008-04-08 12 +498 2008-04-08 12 +458 2008-04-08 12 +362 2008-04-08 12 +285 2008-04-08 12 +348 2008-04-08 12 +273 2008-04-08 12 +281 2008-04-08 12 +344 2008-04-08 12 +469 2008-04-08 12 +315 2008-04-08 12 +448 2008-04-08 12 +348 2008-04-08 12 +307 2008-04-08 12 +414 2008-04-08 12 +477 2008-04-08 12 +222 2008-04-08 12 +403 2008-04-08 12 +400 2008-04-08 12 +200 2008-04-08 12 diff --git ql/src/test/results/clientpositive/input39.q.out ql/src/test/results/clientpositive/input39.q.out index 9a84aa5..9adbafa 100644 --- ql/src/test/results/clientpositive/input39.q.out +++ ql/src/test/results/clientpositive/input39.q.out @@ -1,6 +1,12 @@ -PREHOOK: query: create table t1(key string, value string) partitioned by (ds string) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) PREHOOK: type: CREATETABLE -POSTHOOK: query: create table t1(key string, value string) partitioned by (ds string) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@t1 PREHOOK: query: create table t2(key string, value string) partitioned by (ds string) @@ -186,4 +192,5 @@ POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:va POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 18 -mapred.job.tracker=does.notexist.com:666 +mapreduce.framework.name=yarn +mapreduce.jobtracker.address=local diff --git ql/src/test/results/clientpositive/input39_hadoop20.q.out ql/src/test/results/clientpositive/input39_hadoop20.q.out new file mode 100644 index 0000000..5a587fc --- /dev/null +++ ql/src/test/results/clientpositive/input39_hadoop20.q.out @@ -0,0 +1,195 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + + +create table t1(key string, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t1 +PREHOOK: query: create table t2(key string, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table t2(key string, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@t2 +PREHOOK: query: insert overwrite table t1 partition (ds='1') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@ds=1 +POSTHOOK: query: insert overwrite table t1 partition (ds='1') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table t1 partition (ds='2') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t1@ds=2 +POSTHOOK: query: insert overwrite table t1 partition (ds='2') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t1@ds=2 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table t2 partition (ds='1') +select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@t2@ds=1 +POSTHOOK: query: insert overwrite table t2 partition (ds='1') +select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@t2@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME t1)) (TOK_TABREF (TOK_TABNAME t2)) (= (. (TOK_TABLE_OR_COL t1) key) (. (TOK_TABLE_OR_COL t2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL t1) ds) '1') (= (. (TOK_TABLE_OR_COL t2) ds) '1'))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Filter Operator + predicate: + expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: ds + type: string + t2 + TableScan + alias: t2 + Filter Operator + predicate: + expr: (((hash(rand(460476415)) & 2147483647) % 32) = 0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: ds + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col2} + 1 {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col2, _col7 + Select Operator + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: bigint + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1@ds=1 +PREHOOK: Input: default@t2@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from t1 join t2 on t1.key=t2.key where t1.ds='1' and t2.ds='1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1@ds=1 +POSTHOOK: Input: default@t2@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t1 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +18 +mapred.job.tracker=localhost:58 diff --git ql/src/test/results/clientpositive/join14.q.out ql/src/test/results/clientpositive/join14.q.out index 92e4b7e..0c03aff 100644 --- ql/src/test/results/clientpositive/join14.q.out +++ ql/src/test/results/clientpositive/join14.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 PREHOOK: query: EXPLAIN diff --git ql/src/test/results/clientpositive/join14_hadoop20.q.out ql/src/test/results/clientpositive/join14_hadoop20.q.out new file mode 100644 index 0000000..686f0af --- /dev/null +++ ql/src/test/results/clientpositive/join14_hadoop20.q.out @@ -0,0 +1,1887 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE dest1(c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src)) (TOK_TABREF (TOK_TABNAME srcpart)) (and (AND (= (. (TOK_TABLE_OR_COL src) key) (. (TOK_TABLE_OR_COL srcpart) key)) (= (. (TOK_TABLE_OR_COL srcpart) ds) '2008-04-08')) (> (. (TOK_TABLE_OR_COL src) key) 100)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 100.0) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + srcpart + TableScan + alias: srcpart + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 +INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select dest1.* from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select dest1.* from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +104 val_104 +105 val_105 +105 val_105 +111 val_111 +111 val_111 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +113 val_113 +114 val_114 +114 val_114 +116 val_116 +116 val_116 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +119 val_119 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +125 val_125 +126 val_126 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +129 val_129 +131 val_131 +131 val_131 +133 val_133 +133 val_133 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +134 val_134 +136 val_136 +136 val_136 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +143 val_143 +145 val_145 +145 val_145 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +149 val_149 +150 val_150 +150 val_150 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +152 val_152 +153 val_153 +153 val_153 +155 val_155 +155 val_155 +156 val_156 +156 val_156 +157 val_157 +157 val_157 +158 val_158 +158 val_158 +160 val_160 +160 val_160 +162 val_162 +162 val_162 +163 val_163 +163 val_163 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +166 val_166 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +170 val_170 +170 val_170 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +176 val_176 +177 val_177 +177 val_177 +178 val_178 +178 val_178 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +179 val_179 +180 val_180 +180 val_180 +181 val_181 +181 val_181 +183 val_183 +183 val_183 +186 val_186 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +189 val_189 +190 val_190 +190 val_190 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +191 val_191 +192 val_192 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +194 val_194 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +195 val_195 +196 val_196 +196 val_196 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +199 val_199 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +200 val_200 +201 val_201 +201 val_201 +202 val_202 +202 val_202 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +213 val_213 +214 val_214 +214 val_214 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +217 val_217 +218 val_218 +218 val_218 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +221 val_221 +222 val_222 +222 val_222 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +224 val_224 +226 val_226 +226 val_226 +228 val_228 +228 val_228 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +233 val_233 +235 val_235 +235 val_235 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +239 val_239 +241 val_241 +241 val_241 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +242 val_242 +244 val_244 +244 val_244 +247 val_247 +247 val_247 +248 val_248 +248 val_248 +249 val_249 +249 val_249 +252 val_252 +252 val_252 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +256 val_256 +257 val_257 +257 val_257 +258 val_258 +258 val_258 +260 val_260 +260 val_260 +262 val_262 +262 val_262 +263 val_263 +263 val_263 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +265 val_265 +266 val_266 +266 val_266 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +274 val_274 +275 val_275 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +278 val_278 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +282 val_282 +283 val_283 +283 val_283 +284 val_284 +284 val_284 +285 val_285 +285 val_285 +286 val_286 +286 val_286 +287 val_287 +287 val_287 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +288 val_288 +289 val_289 +289 val_289 +291 val_291 +291 val_291 +292 val_292 +292 val_292 +296 val_296 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +298 val_298 +302 val_302 +302 val_302 +305 val_305 +305 val_305 +306 val_306 +306 val_306 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +307 val_307 +308 val_308 +308 val_308 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +309 val_309 +310 val_310 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +322 val_322 +323 val_323 +323 val_323 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +327 val_327 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +331 val_331 +332 val_332 +332 val_332 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +333 val_333 +335 val_335 +335 val_335 +336 val_336 +336 val_336 +338 val_338 +338 val_338 +339 val_339 +339 val_339 +341 val_341 +341 val_341 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +344 val_344 +345 val_345 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +351 val_351 +351 val_351 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +353 val_353 +356 val_356 +356 val_356 +360 val_360 +360 val_360 +362 val_362 +362 val_362 +364 val_364 +364 val_364 +365 val_365 +365 val_365 +366 val_366 +366 val_366 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +367 val_367 +368 val_368 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +369 val_369 +373 val_373 +373 val_373 +374 val_374 +374 val_374 +375 val_375 +375 val_375 +377 val_377 +377 val_377 +378 val_378 +378 val_378 +379 val_379 +379 val_379 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +386 val_386 +389 val_389 +389 val_389 +392 val_392 +392 val_392 +393 val_393 +393 val_393 +394 val_394 +394 val_394 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +399 val_399 +400 val_400 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +409 val_409 +411 val_411 +411 val_411 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +418 val_418 +419 val_419 +419 val_419 +421 val_421 +421 val_421 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +424 val_424 +427 val_427 +427 val_427 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +429 val_429 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +432 val_432 +435 val_435 +435 val_435 +436 val_436 +436 val_436 +437 val_437 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +439 val_439 +443 val_443 +443 val_443 +444 val_444 +444 val_444 +446 val_446 +446 val_446 +448 val_448 +448 val_448 +449 val_449 +449 val_449 +452 val_452 +452 val_452 +453 val_453 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +455 val_455 +457 val_457 +457 val_457 +458 val_458 +458 val_458 +458 val_458 +458 val_458 +458 val_458 +458 val_458 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +459 val_459 +460 val_460 +460 val_460 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +470 val_470 +470 val_470 +472 val_472 +472 val_472 +475 val_475 +475 val_475 +477 val_477 +477 val_477 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +478 val_478 +479 val_479 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +481 val_481 +482 val_482 +482 val_482 +483 val_483 +483 val_483 +484 val_484 +484 val_484 +485 val_485 +485 val_485 +487 val_487 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +490 val_490 +491 val_491 +491 val_491 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +492 val_492 +493 val_493 +493 val_493 +494 val_494 +494 val_494 +495 val_495 +495 val_495 +496 val_496 +496 val_496 +497 val_497 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 +498 val_498 diff --git ql/src/test/results/clientpositive/leftsemijoin.q.out ql/src/test/results/clientpositive/leftsemijoin.q.out index ce81225..4425bd2 100644 --- ql/src/test/results/clientpositive/leftsemijoin.q.out +++ ql/src/test/results/clientpositive/leftsemijoin.q.out @@ -40,42 +40,42 @@ POSTHOOK: query: load data local inpath '../data/files/things2.txt' INTO TABLE t POSTHOOK: type: LOAD POSTHOOK: Output: default@things POSTHOOK: Output: default@things@ds=2011-10-24 -PREHOOK: query: SELECT name,id FROM sales +PREHOOK: query: SELECT name,id FROM sales ORDER BY name ASC, id ASC PREHOOK: type: QUERY PREHOOK: Input: default@sales #### A masked pattern was here #### -POSTHOOK: query: SELECT name,id FROM sales +POSTHOOK: query: SELECT name,id FROM sales ORDER BY name ASC, id ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@sales #### A masked pattern was here #### -Joe 2 Hank 2 -PREHOOK: query: SELECT id,name FROM things +Joe 2 +PREHOOK: query: SELECT id,name FROM things ORDER BY id ASC, name ASC PREHOOK: type: QUERY PREHOOK: Input: default@things@ds=2011-10-23 PREHOOK: Input: default@things@ds=2011-10-24 #### A masked pattern was here #### -POSTHOOK: query: SELECT id,name FROM things +POSTHOOK: query: SELECT id,name FROM things ORDER BY id ASC, name ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@things@ds=2011-10-23 POSTHOOK: Input: default@things@ds=2011-10-24 #### A masked pattern was here #### 2 Tie 2 Tie -PREHOOK: query: SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) +PREHOOK: query: SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) ORDER BY name ASC, id ASC PREHOOK: type: QUERY PREHOOK: Input: default@sales PREHOOK: Input: default@things@ds=2011-10-23 PREHOOK: Input: default@things@ds=2011-10-24 #### A masked pattern was here #### -POSTHOOK: query: SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) +POSTHOOK: query: SELECT name,id FROM sales LEFT SEMI JOIN things ON (sales.id = things.id) ORDER BY name ASC, id ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@sales POSTHOOK: Input: default@things@ds=2011-10-23 POSTHOOK: Input: default@things@ds=2011-10-24 #### A masked pattern was here #### -Joe 2 Hank 2 +Joe 2 PREHOOK: query: drop table sales PREHOOK: type: DROPTABLE PREHOOK: Input: default@sales diff --git ql/src/test/results/clientpositive/query_properties.q.out ql/src/test/results/clientpositive/query_properties.q.out index d0d5599..5684f0d 100644 --- ql/src/test/results/clientpositive/query_properties.q.out +++ ql/src/test/results/clientpositive/query_properties.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: select * from src a join src b on a.key = b.key limit 1 +PREHOOK: query: select * from src a join src b on a.key = b.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -10,8 +10,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: false -0 val_0 0 val_0 -PREHOOK: query: select * from src group by src.key, src.value limit 1 +PREHOOK: query: select * from src group by src.key, src.value limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -23,8 +22,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: false -0 val_0 -PREHOOK: query: select * from src order by src.key limit 1 +PREHOOK: query: select * from src order by src.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -36,8 +34,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: false -0 val_0 -PREHOOK: query: select * from src sort by src.key limit 1 +PREHOOK: query: select * from src sort by src.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -49,8 +46,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: false -0 val_0 -PREHOOK: query: select a.key, sum(b.value) from src a join src b on a.key = b.key group by a.key limit 1 +PREHOOK: query: select a.key, sum(b.value) from src a join src b on a.key = b.key group by a.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -62,8 +58,7 @@ Has Group By After Join: true Uses Script: false Has Distribute By: false Has Cluster By: false -0 0.0 -PREHOOK: query: select transform(*) using 'cat' from src limit 1 +PREHOOK: query: select transform(*) using 'cat' from src limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -75,8 +70,7 @@ Has Group By After Join: false Uses Script: true Has Distribute By: false Has Cluster By: false -238 val_238 -PREHOOK: query: select * from src distribute by src.key limit 1 +PREHOOK: query: select * from src distribute by src.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -88,8 +82,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: true Has Cluster By: false -238 val_238 -PREHOOK: query: select * from src cluster by src.key limit 1 +PREHOOK: query: select * from src cluster by src.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -101,8 +94,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: true -0 val_0 -PREHOOK: query: select key, sum(value) from (select a.key as key, b.value as value from src a join src b on a.key = b.key) c group by key limit 1 +PREHOOK: query: select key, sum(value) from (select a.key as key, b.value as value from src a join src b on a.key = b.key) c group by key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -114,8 +106,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: false -0 0.0 -PREHOOK: query: select * from src a join src b on a.key = b.key order by a.key limit 1 +PREHOOK: query: select * from src a join src b on a.key = b.key order by a.key limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -127,8 +118,7 @@ Has Group By After Join: false Uses Script: false Has Distribute By: false Has Cluster By: false -0 val_0 0 val_0 -PREHOOK: query: select * from src a join src b on a.key = b.key distribute by a.key sort by a.key, b.value limit 1 +PREHOOK: query: select * from src a join src b on a.key = b.key distribute by a.key sort by a.key, b.value limit 0 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### @@ -140,4 +130,3 @@ Has Group By After Join: false Uses Script: false Has Distribute By: true Has Cluster By: false -0 val_0 0 val_0 diff --git ql/src/test/results/clientpositive/repair.q.out ql/src/test/results/clientpositive/repair.q.out index a05726a..fdffbf8 100644 --- ql/src/test/results/clientpositive/repair.q.out +++ ql/src/test/results/clientpositive/repair.q.out @@ -1,6 +1,10 @@ -PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) PREHOOK: type: CREATETABLE -POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@repairtable PREHOOK: query: MSCK TABLE repairtable diff --git ql/src/test/results/clientpositive/repair_hadoop20.q.out ql/src/test/results/clientpositive/repair_hadoop20.q.out new file mode 100644 index 0000000..2ae1a6c --- /dev/null +++ ql/src/test/results/clientpositive/repair_hadoop20.q.out @@ -0,0 +1,29 @@ +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@repairtable +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK +Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a +PREHOOK: query: MSCK REPAIR TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK REPAIR TABLE repairtable +POSTHOOK: type: MSCK +Partitions not in metastore: repairtable:p1=a/p2=a repairtable:p1=b/p2=a +Repair: Added partition to metastore repairtable:p1=a/p2=a +Repair: Added partition to metastore repairtable:p1=b/p2=a +PREHOOK: query: MSCK TABLE repairtable +PREHOOK: type: MSCK +POSTHOOK: query: MSCK TABLE repairtable +POSTHOOK: type: MSCK diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out index 9f35f2a..479458a 100644 --- ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out +++ ql/src/test/results/clientpositive/sample_islocalmode_hook.q.out @@ -10,10 +10,14 @@ PREHOOK: query: drop table if exists sih_src2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table if exists sih_src2 POSTHOOK: type: DROPTABLE -PREHOOK: query: -- create file inputs +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE -POSTHOOK: query: -- create file inputs +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs create table sih_i_part (key int, value string) partitioned by (p string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@sih_i_part @@ -83,25 +87,31 @@ POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchem POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- sample split, running locally limited by num tasks +PREHOOK: query: -- Relaxing hive.exec.mode.local.auto.input.files.max=1. +-- Hadoop20 will not generate more splits than there are files (one). +-- Hadoop23 generate splits correctly (four), hence the max needs to be adjusted to ensure running in local mode. +-- Default value is hive.exec.mode.local.auto.input.files.max=4 which produces expected behavior on Hadoop23. +-- hive.sample.seednumber is required because Hadoop23 generates multiple splits and tablesample is non-repeatable without it. + +-- sample split, running locally limited by num tasks select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### -1500 +25 PREHOOK: query: -- sample two tables -select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key +select count(1) from sih_src tablesample(1 percent) a join sih_src2 tablesample(1 percent) b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@sih_src PREHOOK: Input: default@sih_src2 #### A masked pattern was here #### -3084 +49 PREHOOK: query: -- sample split, running locally limited by max bytes select count(1) from sih_src tablesample(1 percent) PREHOOK: type: QUERY PREHOOK: Input: default@sih_src #### A masked pattern was here #### -1500 +25 PREHOOK: query: drop table sih_i_part PREHOOK: type: DROPTABLE PREHOOK: Input: default@sih_i_part diff --git ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out new file mode 100644 index 0000000..fe5a156 --- /dev/null +++ ql/src/test/results/clientpositive/sample_islocalmode_hook_hadoop20.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: drop table if exists sih_i_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists sih_i_part +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists sih_src +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists sih_src +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists sih_src2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists sih_src2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +-- create file inputs +create table sih_i_part (key int, value string) partitioned by (p string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@sih_i_part +PREHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@sih_i_part@p=1 +POSTHOOK: query: insert overwrite table sih_i_part partition (p='1') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@sih_i_part@p=1 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table sih_i_part partition (p='2') select key+10000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@sih_i_part@p=2 +POSTHOOK: query: insert overwrite table sih_i_part partition (p='2') select key+10000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@sih_i_part@p=2 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table sih_i_part partition (p='3') select key+20000, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@sih_i_part@p=3 +POSTHOOK: query: insert overwrite table sih_i_part partition (p='3') select key+20000, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@sih_i_part@p=3 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table sih_src as select key, value from sih_i_part order by key, value +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@sih_i_part@p=1 +PREHOOK: Input: default@sih_i_part@p=2 +PREHOOK: Input: default@sih_i_part@p=3 +POSTHOOK: query: create table sih_src as select key, value from sih_i_part order by key, value +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@sih_i_part@p=1 +POSTHOOK: Input: default@sih_i_part@p=2 +POSTHOOK: Input: default@sih_i_part@p=3 +POSTHOOK: Output: default@sih_src +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table sih_src2 as select key, value from sih_src order by key, value +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@sih_src +POSTHOOK: query: create table sih_src2 as select key, value from sih_src order by key, value +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@sih_src +POSTHOOK: Output: default@sih_src2 +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: sih_i_part PARTITION(p=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- sample split, running locally limited by num tasks +select count(1) from sih_src tablesample(1 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@sih_src +#### A masked pattern was here #### +1500 +PREHOOK: query: -- sample two tables +select count(1) from sih_src tablesample(1 percent)a join sih_src2 tablesample(1 percent)b on a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@sih_src +PREHOOK: Input: default@sih_src2 +#### A masked pattern was here #### +3084 +PREHOOK: query: -- sample split, running locally limited by max bytes +select count(1) from sih_src tablesample(1 percent) +PREHOOK: type: QUERY +PREHOOK: Input: default@sih_src +#### A masked pattern was here #### +1500 +PREHOOK: query: drop table sih_i_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@sih_i_part +PREHOOK: Output: default@sih_i_part +PREHOOK: query: drop table sih_src +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@sih_src +PREHOOK: Output: default@sih_src +PREHOOK: query: drop table sih_src2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@sih_src2 +PREHOOK: Output: default@sih_src2 diff --git ql/src/test/results/clientpositive/split_sample.q.out ql/src/test/results/clientpositive/split_sample.q.out index cc622c8..fc5fc82 100644 --- ql/src/test/results/clientpositive/split_sample.q.out +++ ql/src/test/results/clientpositive/split_sample.q.out @@ -26,10 +26,26 @@ PREHOOK: query: drop table ss_t5 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table ss_t5 POSTHOOK: type: DROPTABLE -PREHOOK: query: -- create multiple file inputs (two enable multiple splits) +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) create table ss_i_part (key int, value string) partitioned by (p string) PREHOOK: type: CREATETABLE -POSTHOOK: query: -- create multiple file inputs (two enable multiple splits) +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) +-- This test sets mapred.max.split.size=300 and hive.merge.smallfiles.avgsize=1 +-- in an attempt to force the generation of multiple splits and multiple output files. +-- However, Hadoop 0.20 is incapable of generating splits smaller than the block size +-- when using CombineFileInputFormat, so only one split is generated. This has a +-- significant impact on the results of the TABLESAMPLE(x PERCENT). This issue was +-- fixed in MAPREDUCE-2046 which is included in 0.22. + +-- create multiple file inputs (two enable multiple splits) create table ss_i_part (key int, value string) partitioned by (p string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@ss_i_part @@ -406,7 +422,7 @@ STAGE PLANS: Percentage Sample: ss_src2 percentage: 70.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Group By Operator aggregations: @@ -523,7 +539,7 @@ STAGE PLANS: Percentage Sample: subq:ss_src2 percentage: 1.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Extract Limit @@ -4098,10 +4114,10 @@ STAGE PLANS: Percentage Sample: subq:t1 percentage: 80.0 - seed number: 5 + seed number: 7 subq:t2 percentage: 2.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Join Operator condition map: @@ -4242,7 +4258,7 @@ STAGE PLANS: Percentage Sample: ss_src2 percentage: 1.0 - seed number: 5 + seed number: 7 Reduce Operator Tree: Group By Operator aggregations: diff --git service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java index 8c3fe34..fd38907 100644 --- service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java +++ service/src/test/org/apache/hadoop/hive/service/TestHiveServerSessions.java @@ -52,7 +52,7 @@ public class TestHiveServerSessions extends TestCase { } }); server.start(); - Thread.sleep(1000); + Thread.sleep(5000); for (int i = 0; i < transports.length ; i++) { TSocket transport = new TSocket("localhost", port); diff --git shims/build.xml shims/build.xml index 906abed..d936d1f 100644 --- shims/build.xml +++ shims/build.xml @@ -40,17 +40,22 @@ to call at top-level: ant deploy-contrib compile-core-test - - - - - - - - - - - + + + + + + + + + + + + + + + + @@ -95,47 +100,119 @@ to call at top-level: ant deploy-contrib compile-core-test - + - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git shims/ivy.xml shims/ivy.xml index 15bbf45..cb33dea 100644 --- shims/ivy.xml +++ shims/ivy.xml @@ -38,16 +38,17 @@ transitive="false"/> - + - + + @@ -73,8 +74,26 @@ + + + + + + + + + + + + + @@ -98,6 +117,8 @@ + diff --git shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java index 61d0388..e6dcd88 100644 --- shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java +++ shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java @@ -23,6 +23,8 @@ import java.io.IOException; import java.lang.reflect.Constructor; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; +import java.net.MalformedURLException; +import java.net.URL; import java.util.ArrayList; import java.util.List; @@ -51,6 +53,7 @@ import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.TaskAttemptContext; import org.apache.hadoop.mapred.TaskCompletionEvent; import org.apache.hadoop.mapred.TaskID; +import org.apache.hadoop.mapred.TaskLogServlet; import org.apache.hadoop.mapred.lib.CombineFileInputFormat; import org.apache.hadoop.mapred.lib.CombineFileSplit; import org.apache.hadoop.mapreduce.Job; @@ -526,6 +529,17 @@ public class Hadoop20Shims implements HadoopShims { } @Override + public String getTaskAttemptLogUrl(JobConf conf, + String taskTrackerHttpAddress, String taskAttemptId) + throws MalformedURLException { + URL taskTrackerHttpURL = new URL(taskTrackerHttpAddress); + return TaskLogServlet.getTaskLogUrl( + taskTrackerHttpURL.getHost(), + Integer.toString(taskTrackerHttpURL.getPort()), + taskAttemptId); + } + + @Override public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { JobTrackerState state; switch (clusterStatus.getJobTrackerState()) { diff --git shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java index 1507981..c2d0310 100644 --- shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java +++ shims/src/0.20S/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java @@ -17,10 +17,15 @@ */ package org.apache.hadoop.hive.shims; +import java.net.MalformedURLException; +import java.net.URL; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.shims.HadoopShimsSecure; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.TaskLogServlet; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.util.Progressable; @@ -30,6 +35,17 @@ import org.apache.hadoop.util.Progressable; public class Hadoop20SShims extends HadoopShimsSecure { @Override + public String getTaskAttemptLogUrl(JobConf conf, + String taskTrackerHttpAddress, String taskAttemptId) + throws MalformedURLException { + URL taskTrackerHttpURL = new URL(taskTrackerHttpAddress); + return TaskLogServlet.getTaskLogUrl( + taskTrackerHttpURL.getHost(), + Integer.toString(taskTrackerHttpURL.getPort()), + taskAttemptId); + } + + @Override public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { JobTrackerState state; switch (clusterStatus.getJobTrackerState()) { diff --git shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java index 848f635..413bbd7 100644 --- shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java +++ shims/src/0.23/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java @@ -17,14 +17,18 @@ */ package org.apache.hadoop.hive.shims; +import java.net.MalformedURLException; +import java.net.URL; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.shims.HadoopShims.JobTrackerState; -import org.apache.hadoop.hive.shims.HadoopShimsSecure; import org.apache.hadoop.mapred.ClusterStatus; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.task.JobContextImpl; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.hadoop.mapreduce.util.HostUtil; import org.apache.hadoop.util.Progressable; /** @@ -33,6 +37,24 @@ import org.apache.hadoop.util.Progressable; public class Hadoop23Shims extends HadoopShimsSecure { @Override + public String getTaskAttemptLogUrl(JobConf conf, + String taskTrackerHttpAddress, String taskAttemptId) + throws MalformedURLException { + if (conf.get("mapreduce.framework.name") != null + && conf.get("mapreduce.framework.name").equals("yarn")) { + // if the cluster is running in MR2 mode, return null + //LOG.warn("Can't fetch tasklog: TaskLogServlet is not supported in MR2 mode."); + return null; + } else { + // if the cluster is running in MR1 mode, using HostUtil to construct TaskLogURL + URL taskTrackerHttpURL = new URL(taskTrackerHttpAddress); + return HostUtil.getTaskLogUrl(taskTrackerHttpURL.getHost(), + Integer.toString(taskTrackerHttpURL.getPort()), + taskAttemptId); + } + } + + @Override public JobTrackerState getJobTrackerState(ClusterStatus clusterStatus) throws Exception { JobTrackerState state; switch (clusterStatus.getJobTrackerStatus()) { diff --git shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java index 0cc4aba..437c94f 100644 --- shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java +++ shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.shims; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.net.MalformedURLException; import java.security.PrivilegedExceptionAction; import java.util.List; @@ -66,6 +67,17 @@ public interface HadoopShims { boolean usesJobShell(); /** + * Constructs and Returns TaskAttempt Log Url + * or null if the TaskLogServlet is not available + * + * @return TaskAttempt Log Url + */ + String getTaskAttemptLogUrl(JobConf conf, + String taskTrackerHttpAddress, + String taskAttemptId) + throws MalformedURLException; + + /** * Return true if the job has not switched to RUNNING state yet * and is still in PREP state */ diff --git shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java index ec2e44f..4275a2c 100644 --- shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java +++ shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.shims; +import java.lang.IllegalArgumentException; import java.util.HashMap; import java.util.Map; @@ -116,8 +117,16 @@ public abstract class ShimLoader { throw new RuntimeException("Illegal Hadoop Version: " + vers + " (expected A.B.* format)"); } - if (Integer.parseInt(parts[0]) > 0){ - return "0.20S"; + + switch (Integer.parseInt(parts[0])) { + case 0: + break; + case 1: + return "0.20S"; + case 2: + return "0.23"; + default: + throw new IllegalArgumentException("Unrecognized Hadoop major version number: " + vers); } String majorVersion = parts[0] + "." + parts[1];