diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index c5e027b..af8ec67 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -479,6 +479,7 @@ public SparkOnYarnCliConfig() { setQueryDir("ql/src/test/queries/clientpositive"); includesFrom(testConfigProps, "miniSparkOnYarn.query.files"); + includesFrom(testConfigProps, "spark.only.query.files"); setResultsDir("ql/src/test/results/clientpositive/spark"); setLogDir("itests/qtest-spark/target/qfile-results/clientpositive/spark"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 108c4e6..e204f3d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2501,6 +2501,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, case EXTRACT: case EVENT: case HASHTABLESINK: + case SPARKPRUNINGSINK: vectorOp = OperatorFactory.getVectorOperator( op.getCompilationOpContext(), op.getConf(), vContext); break; diff --git a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q index 8b83ef6..3b6810b 100644 --- a/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning.q @@ -1,58 +1,58 @@ -set hive.support.sql11.reserved.keywords=false; set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=true; set hive.spark.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.strict.checks.cartesian.product=false; -- SORT_QUERY_RESULTS select distinct ds from srcpart; select distinct hr from srcpart; -EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds; -create table srcpart_date as select ds as ds, ds as date from srcpart group by ds; +EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; +create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds; create table srcpart_hour as select hr as hr, hr as hour from srcpart group by hr; -create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr; +create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr; create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; -select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.spark.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; -select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.spark.dynamic.partition.pruning=false; EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; -select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.spark.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; -select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; -select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.spark.dynamic.partition.pruning=false; -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; -select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; set hive.spark.dynamic.partition.pruning=true; select count(*) from srcpart where ds = 'I DONT EXIST'; @@ -75,34 +75,34 @@ select count(*) from srcpart where cast(hr as string) = 11; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; -select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); +select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr); -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; +select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery EXPLAIN select count(*) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); @@ -117,26 +117,26 @@ set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000; -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; -select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11; select count(*) from srcpart where hr = 11 and ds = '2008-04-08'; -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; -select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11; +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; +select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11; select count(*) from srcpart where ds = '2008-04-08' and hr = 11; -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11; @@ -146,27 +146,27 @@ select count(*) from srcpart join srcpart_double_hour on (srcpart.hr*2 = srcpart select count(*) from srcpart where hr = 11; -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; -select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; +select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; select count(*) from srcpart where ds = '2008-04-08'; -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; -EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; +EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08'; +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08'; -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11; EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- Disabled until TEZ-1486 is fixed -- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) --- where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +-- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart); diff --git a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q index 293fcfc..791c3a9 100644 --- a/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q +++ b/ql/src/test/queries/clientpositive/spark_vectorized_dynamic_partition_pruning.q @@ -4,6 +4,7 @@ set hive.spark.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; +set hive.strict.checks.cartesian.product=false; select distinct ds from srcpart; diff --git a/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out b/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out index 93d07d6..b7116c1 100644 --- a/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_combine_equivalent_work.q.out @@ -30,11 +30,9 @@ POSTHOOK: Output: default@a1 POSTHOOK: Output: default@a1@end_dt=20161021 PREHOOK: query: insert into table a1 partition(END_DT='20161020') values('2000721360','20161001') PREHOOK: type: QUERY -PREHOOK: Input: default@values__tmp__table__1 PREHOOK: Output: default@a1@end_dt=20161020 POSTHOOK: query: insert into table a1 partition(END_DT='20161020') values('2000721360','20161001') POSTHOOK: type: QUERY -POSTHOOK: Input: default@values__tmp__table__1 POSTHOOK: Output: default@a1@end_dt=20161020 POSTHOOK: Lineage: a1 PARTITION(end_dt=20161020).kehhao SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: a1 PARTITION(end_dt=20161020).start_dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 16aa452..fc6edb4 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -38,9 +38,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 11 12 -PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +PREHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +POSTHOOK: query: EXPLAIN create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT STAGE DEPENDENCIES: Stage-1 is a root stage @@ -112,7 +112,7 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator -PREHOOK: query: create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +PREHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -121,7 +121,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: database:default PREHOOK: Output: default@srcpart_date -POSTHOOK: query: create table srcpart_date as select ds as ds, ds as date from srcpart group by ds +POSTHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -130,6 +130,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_date +POSTHOOK: Lineage: srcpart_date.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] PREHOOK: query: create table srcpart_hour as select hr as hr, hr as hour from srcpart group by hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -148,7 +150,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_hour -PREHOOK: query: create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr +POSTHOOK: Lineage: srcpart_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +PREHOOK: query: create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -157,7 +161,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: database:default PREHOOK: Output: default@srcpart_date_hour -POSTHOOK: query: create table srcpart_date_hour as select ds as ds, ds as date, hr as hr, hr as hour from srcpart group by ds, hr +POSTHOOK: query: create table srcpart_date_hour as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -166,6 +170,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_date_hour +POSTHOOK: Lineage: srcpart_date_hour.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: create table srcpart_double_hour as select (hr*2) as hr, hr as hour from srcpart group by hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -184,11 +192,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_double_hour +POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -204,10 +214,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -254,10 +264,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -297,8 +307,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -307,7 +317,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -316,7 +326,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -326,9 +336,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 1000 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -361,10 +371,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -404,8 +414,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -414,7 +424,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -423,7 +433,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -436,23 +446,19 @@ POSTHOOK: Input: default@srcpart_date PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -468,10 +474,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -495,10 +501,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -546,10 +552,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -564,10 +570,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -622,8 +628,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -633,7 +639,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -644,7 +650,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -656,10 +662,10 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -694,10 +700,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -712,10 +718,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -770,8 +776,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -781,7 +787,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -792,7 +798,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -806,19 +812,17 @@ POSTHOOK: Input: default@srcpart_hour PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -834,10 +838,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -861,10 +865,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -910,10 +914,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -953,8 +957,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -963,7 +967,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -972,7 +976,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -982,9 +986,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1017,10 +1021,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -1060,8 +1064,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1070,7 +1074,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1079,7 +1083,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1092,19 +1096,17 @@ POSTHOOK: Input: default@srcpart_date_hour PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -1120,10 +1122,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1170,10 +1172,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1213,8 +1215,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1223,7 +1225,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1232,7 +1234,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1242,9 +1244,9 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart_date #### A masked pattern was here #### 0 -PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1277,10 +1279,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1320,8 +1322,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1330,7 +1332,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1339,7 +1341,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -1378,10 +1380,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1428,10 +1430,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1471,8 +1473,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1518,10 +1520,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1560,18 +1562,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + key expressions: (UDFToDouble(_col0) * 2.0) (type: double) sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1588,7 +1590,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1611,8 +1613,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1675,10 +1677,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1718,8 +1720,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1774,18 +1776,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + key expressions: (UDFToDouble(_col0) * 2.0) (type: double) sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1802,7 +1804,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1825,8 +1827,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1857,14 +1859,10 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 @@ -1885,10 +1883,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1927,18 +1925,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + key expressions: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) sort order: + - Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1955,7 +1953,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + 0 UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) 1 UDFToString(_col0) (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1978,8 +1976,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2010,99 +2008,43 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where cast(hr as string) = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where cast(hr as string) = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark - Edges: - Reducer 7 <- Map 6 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -2110,18 +2052,16 @@ STAGE PLANS: filterExpr: (ds = '2008-04-08') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + keys: '2008-04-08' (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: '2008-04-08' (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -2129,9 +2069,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2152,21 +2092,21 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: '2008-04-08' (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2174,42 +2114,35 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY POSTHOOK: query: -- non-equi join -EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2262,12 +2195,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 = _col2) or (_col1 = _col4)) (type: boolean) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 77248 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2288,8 +2221,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2298,8 +2231,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2308,7 +2241,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) +POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2319,10 +2252,10 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 1500 PREHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY POSTHOOK: query: -- old style join syntax -EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +EXPLAIN select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2457,8 +2390,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2467,7 +2400,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2476,7 +2409,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr +POSTHOOK: query: select count(*) from srcpart, srcpart_date_hour where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 and srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -2487,10 +2420,10 @@ POSTHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### 500 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2599,8 +2532,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2609,9 +2542,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2719,8 +2652,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2730,10 +2663,10 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2841,8 +2774,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2853,11 +2786,11 @@ STAGE PLANS: PREHOOK: query: -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -2873,10 +2806,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -2911,22 +2844,23 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -2947,11 +2881,13 @@ STAGE PLANS: predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '11' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -2961,11 +2897,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: '11' (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -2996,8 +2933,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -3007,7 +2944,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -3016,7 +2953,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -3026,10 +2963,10 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -3048,28 +2985,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3090,11 +3028,13 @@ STAGE PLANS: predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '13' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '13' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -3104,11 +3044,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: '13' (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: '13' (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -3139,8 +3080,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -3150,14 +3091,14 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart_date @@ -3179,11 +3120,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2) + Reducer 13 <- Map 12 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 12 Map Operator Tree: TableScan alias: srcpart @@ -3196,12 +3138,12 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 9 Map Operator Tree: TableScan alias: srcpart @@ -3214,84 +3156,81 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 + Reducer 13 Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3315,12 +3254,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -3333,16 +3272,16 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3367,8 +3306,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: @@ -3376,42 +3315,48 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - - Stage: Stage-0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: @@ -3447,11 +3392,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2) + Reducer 13 <- Map 12 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 12 Map Operator Tree: TableScan alias: srcpart @@ -3464,12 +3410,12 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 9 Map Operator Tree: TableScan alias: srcpart @@ -3482,84 +3428,81 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 + Reducer 13 Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3583,12 +3526,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -3601,16 +3544,16 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3637,8 +3580,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: @@ -3646,40 +3589,46 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3718,13 +3667,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 13 <- Map 12 (GROUP, 1) - Reducer 15 <- Map 10 (GROUP, 1) - Reducer 17 <- Map 12 (GROUP, 1) + Reducer 12 <- Map 11 (GROUP, 1) + Reducer 13 <- Reducer 12 (GROUP, 2), Reducer 15 (GROUP, 2) + Reducer 15 <- Map 14 (GROUP, 1) + Reducer 18 <- Reducer 12 (GROUP, 2), Reducer 15 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: srcpart @@ -3737,12 +3686,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 12 + Map 14 Map Operator Tree: TableScan alias: srcpart @@ -3755,142 +3704,102 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 11 + Reducer 12 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 13 Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 Reducer 15 Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 4 - Reducer 17 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 18 Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 4 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 4 Stage: Stage-1 Spark Edges: + Reducer 10 <- Map 9 (GROUP, 1) Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) Reducer 7 <- Map 6 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 10 (GROUP, 2), Reducer 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ds (type: string) @@ -3915,12 +3824,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 9 Map Operator Tree: TableScan alias: srcpart @@ -3933,11 +3842,28 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -3954,7 +3880,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3964,8 +3890,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Reduce Operator Tree: @@ -3973,40 +3899,29 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 8 Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4035,10 +3950,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 2008-04-09 2008-04-09 PREHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- single column, single key -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4054,10 +3969,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -4131,8 +4046,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4141,7 +4056,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -4150,7 +4065,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -4163,23 +4078,19 @@ POSTHOOK: Input: default@srcpart_date PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple sources, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4195,10 +4106,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -4228,10 +4139,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -4314,8 +4225,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4325,7 +4236,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -4336,7 +4247,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -4350,19 +4261,17 @@ POSTHOOK: Input: default@srcpart_hour PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- multiple columns single source -EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +EXPLAIN select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4378,10 +4287,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -4468,8 +4377,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4478,7 +4387,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +PREHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -4487,7 +4396,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart_date_hour #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.date = '2008-04-08' and srcpart_date_hour.hour = 11 +POSTHOOK: query: select count(*) from srcpart join srcpart_date_hour on (srcpart.ds = srcpart_date_hour.ds and srcpart.hr = srcpart_date_hour.hr) where srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -4500,19 +4409,17 @@ POSTHOOK: Input: default@srcpart_date_hour PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' PREHOOK: type: QUERY POSTHOOK: query: -- empty set -EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST' +EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4528,10 +4435,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -4605,8 +4512,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4616,13 +4523,13 @@ STAGE PLANS: ListSink PREHOOK: query: -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 PREHOOK: type: QUERY POSTHOOK: query: -- Disabled until TEZ-1486 is fixed --- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = 'I DONT EXIST'; +-- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = 'I DONT EXIST'; -- expressions EXPLAIN select count(*) from srcpart join srcpart_double_hour on (srcpart.hr = cast(srcpart_double_hour.hr/2 as int)) where srcpart_double_hour.hour = 11 @@ -4641,10 +4548,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -4718,8 +4625,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4765,10 +4672,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -4776,7 +4683,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Select Operator expressions: _col0 (type: double) @@ -4815,7 +4722,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) input vertices: 1 Map 3 @@ -4842,8 +4749,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4874,21 +4781,18 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- parent is reduce tasks -EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -4909,46 +4813,32 @@ STAGE PLANS: filterExpr: (ds = '2008-04-08') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + keys: '2008-04-08' (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: '2008-04-08' (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 4 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: '2008-04-08' (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Spark HashTable Sink Operator + keys: + 0 + 1 Stage: Stage-1 Spark @@ -4960,21 +4850,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 + 1 input vertices: 1 Reducer 4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -4997,8 +4885,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5007,41 +4895,34 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' +POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- left join -EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart left join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -5134,8 +5015,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5144,9 +5025,9 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +PREHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date left join srcpart on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -5257,8 +5138,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5268,10 +5149,10 @@ STAGE PLANS: ListSink PREHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY POSTHOOK: query: -- full outer -EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.date = '2008-04-08' +EXPLAIN select count(*) from srcpart full outer join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -5382,8 +5263,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5394,11 +5275,11 @@ STAGE PLANS: PREHOOK: query: -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY POSTHOOK: query: -- with static pruning EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage @@ -5414,10 +5295,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -5453,11 +5334,13 @@ STAGE PLANS: predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 '11' (type: string) - 1 '11' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -5473,8 +5356,8 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -5482,6 +5365,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -5489,8 +5373,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 '11' (type: string) - 1 '11' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) input vertices: 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE @@ -5516,8 +5400,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5527,7 +5411,7 @@ STAGE PLANS: ListSink PREHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -5536,7 +5420,7 @@ PREHOOK: Input: default@srcpart_date PREHOOK: Input: default@srcpart_hour #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 +where srcpart_date.`date` = '2008-04-08' and srcpart_hour.hour = 11 and srcpart.hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 @@ -5546,10 +5430,10 @@ POSTHOOK: Input: default@srcpart_hour #### A masked pattern was here #### 500 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) -where srcpart_date.date = '2008-04-08' and srcpart.hr = 13 +where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage @@ -5566,14 +5450,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: @@ -5590,10 +5474,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 42 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -5605,13 +5489,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 input vertices: 0 Map 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: - 0 '13' (type: string) - 1 '13' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -5631,13 +5516,15 @@ STAGE PLANS: predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 '13' (type: string) - 1 '13' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) input vertices: 0 Map 2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -5663,8 +5550,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5675,14 +5562,14 @@ STAGE PLANS: PREHOOK: query: -- Disabled until TEZ-1486 is fixed -- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) --- where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +-- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) PREHOOK: type: QUERY POSTHOOK: query: -- Disabled until TEZ-1486 is fixed -- select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) join srcpart_hour on (srcpart.hr = srcpart_hour.hr) --- where srcpart_date.date = '2008-04-08' and srcpart.hr = 13; +-- where srcpart_date.`date` = '2008-04-08' and srcpart.hr = 13; -- union + subquery EXPLAIN select distinct(ds) from srcpart where srcpart.ds in (select max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart) @@ -5696,11 +5583,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -5710,15 +5598,15 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 6 Map Operator Tree: TableScan alias: srcpart @@ -5728,155 +5616,115 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 11 + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(ds) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ds) - mode: hash + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + input vertices: + 1 Reducer 5 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial @@ -5886,49 +5734,9 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out index 4e62a3b..b18fc3c 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_2.q.out @@ -165,29 +165,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) + predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + expressions: id (type: int), label (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - target column name: dim_shops_id - target work: Map 1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: dim_shops_id + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + target column name: dim_shops_id + target work: Map 1 Local Work: Map Reduce Local Work @@ -204,34 +208,35 @@ STAGE PLANS: alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: amount (type: decimal(10,0)), dim_shops_id (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col0 (type: decimal(10,0)) - outputColumnNames: _col6, _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + expressions: _col3 (type: string), _col0 (type: decimal(10,0)) + outputColumnNames: _col3, _col0 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) - keys: _col6 (type: string) + keys: _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Local Work: Map Reduce Local Work @@ -242,24 +247,24 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -332,15 +337,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) + predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), label (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -357,34 +366,35 @@ STAGE PLANS: alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: amount (type: decimal(10,0)), dim_shops_id (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col0 (type: decimal(10,0)) - outputColumnNames: _col6, _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + expressions: _col3 (type: string), _col0 (type: decimal(10,0)) + outputColumnNames: _col3, _col0 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) - keys: _col6 (type: string) + keys: _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Local Work: Map Reduce Local Work @@ -395,24 +405,24 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -481,11 +491,15 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: id is not null (type: boolean) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), label (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -499,29 +513,30 @@ STAGE PLANS: alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col1, _col5, _col6 - input vertices: - 1 Map 2 - Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = _col5) (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dim_shops_id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string) + expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -563,6 +578,7 @@ baz foo foo foo +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT agg.amount FROM agg_01 agg, dim_shops d1 @@ -589,15 +605,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: (id = 1) (type: boolean) + filterExpr: (1 = id) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id = 1) (type: boolean) + predicate: (1 = id) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 1 (type: int) - 1 1 (type: int) + Select Operator + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Local Work: Map Reduce Local Work @@ -611,23 +629,27 @@ STAGE PLANS: alias: agg filterExpr: (dim_shops_id = 1) (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 1 (type: int) - 1 1 (type: int) + Select Operator + expressions: amount (type: decimal(10,0)) outputColumnNames: _col0 - input vertices: - 1 Map 2 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work @@ -637,6 +659,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT agg.amount FROM agg_01 agg, dim_shops d1 @@ -692,29 +715,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: d1 - filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + filterExpr: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) + predicate: ((label) IN ('foo', 'bar') and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + expressions: id (type: int), label (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: dim_shops_id - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - target column name: dim_shops_id - target work: Map 1 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: dim_shops_id + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + target column name: dim_shops_id + target work: Map 1 Local Work: Map Reduce Local Work @@ -731,34 +758,35 @@ STAGE PLANS: alias: agg filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col0, _col1, _col5, _col6 - input vertices: - 1 Map 4 - Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: amount (type: decimal(10,0)), dim_shops_id (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col6 (type: string), _col0 (type: decimal(10,0)) - outputColumnNames: _col6, _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + expressions: _col3 (type: string), _col0 (type: decimal(10,0)) + outputColumnNames: _col3, _col0 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) - keys: _col6 (type: string) + keys: _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Local Work: Map Reduce Local Work @@ -769,24 +797,24 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -852,29 +880,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: dim_shops - filterExpr: (id is not null and (label = 'foo')) (type: boolean) + filterExpr: ((label = 'foo') and id is not null) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (label = 'foo')) (type: boolean) + predicate: ((label = 'foo') and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) Select Operator expressions: id (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: dim_shops_id + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - target column name: dim_shops_id - target work: Map 1 + Spark Partition Pruning Sink Operator + partition key expr: dim_shops_id + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + target column name: dim_shops_id + target work: Map 1 Local Work: Map Reduce Local Work @@ -886,29 +918,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: dim_shops - filterExpr: (id is not null and (label = 'bar')) (type: boolean) + filterExpr: ((label = 'bar') and id is not null) (type: boolean) Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (id is not null and (label = 'bar')) (type: boolean) + predicate: ((label = 'bar') and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) Select Operator expressions: id (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Select Operator + expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: dim_shops_id + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - target column name: dim_shops_id - target work: Map 3 + Spark Partition Pruning Sink Operator + partition key expr: dim_shops_id + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + target column name: dim_shops_id + target work: Map 3 Local Work: Map Reduce Local Work @@ -922,30 +958,27 @@ STAGE PLANS: alias: agg_01 filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = _col5) (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(10,0)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: amount (type: decimal(10,0)), dim_shops_id (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 58 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Map 3 @@ -954,30 +987,27 @@ STAGE PLANS: alias: agg_01 filterExpr: dim_shops_id is not null (type: boolean) Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 dim_shops_id (type: int) - 1 id (type: int) - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 = _col5) (type: boolean) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(10,0)) - outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 8 Data size: 24 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: amount (type: decimal(10,0)), dim_shops_id (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 58 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index c8f6cd7..699fcc6 100644 --- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -127,6 +127,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_date +POSTHOOK: Lineage: srcpart_date.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] PREHOOK: query: create table srcpart_hour stored as orc as select hr as hr, hr as hour from srcpart group by hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -145,6 +147,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_hour +POSTHOOK: Lineage: srcpart_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: create table srcpart_date_hour stored as orc as select ds as ds, ds as `date`, hr as hr, hr as hour from srcpart group by ds, hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -163,6 +167,10 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_date_hour +POSTHOOK: Lineage: srcpart_date_hour.date SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.ds SIMPLE [(srcpart)srcpart.FieldSchema(name:ds, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_date_hour.hr SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: create table srcpart_double_hour stored as orc as select (hr*2) as hr, hr as hour from srcpart group by hr PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@srcpart @@ -181,6 +189,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_double_hour +POSTHOOK: Lineage: srcpart_double_hour.hour SIMPLE [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] +POSTHOOK: Lineage: srcpart_double_hour.hr EXPRESSION [(srcpart)srcpart.FieldSchema(name:hr, type:string, comment:null), ] PREHOOK: query: -- single column, single key EXPLAIN select count(*) from srcpart join srcpart_date on (srcpart.ds = srcpart_date.ds) where srcpart_date.`date` = '2008-04-08' PREHOOK: type: QUERY @@ -201,10 +211,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -252,10 +262,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -297,8 +307,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -361,10 +371,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -406,8 +416,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -438,14 +448,10 @@ POSTHOOK: Input: default@srcpart_date PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key @@ -470,10 +476,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -498,10 +504,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -550,10 +556,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -569,10 +575,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -629,8 +635,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -701,10 +707,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -720,10 +726,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -780,8 +786,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -816,12 +822,10 @@ POSTHOOK: Input: default@srcpart_hour PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source @@ -844,10 +848,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -872,10 +876,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -922,10 +926,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -967,8 +971,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1031,10 +1035,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -1076,8 +1080,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1108,12 +1112,10 @@ POSTHOOK: Input: default@srcpart_date_hour PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set @@ -1136,10 +1138,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1187,10 +1189,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1232,8 +1234,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1296,10 +1298,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -1341,8 +1343,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1399,10 +1401,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1450,10 +1452,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1495,8 +1497,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1542,10 +1544,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1585,18 +1587,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + key expressions: (UDFToDouble(_col0) * 2.0) (type: double) sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1614,7 +1616,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1638,8 +1640,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1702,10 +1704,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1747,8 +1749,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1803,18 +1805,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + key expressions: (UDFToDouble(_col0) * 2.0) (type: double) sort order: + - Map-reduce partition columns: (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + Map-reduce partition columns: (UDFToDouble(_col0) * 2.0) (type: double) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1832,7 +1834,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1856,8 +1858,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -1888,14 +1890,10 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 PREHOOK: query: EXPLAIN select count(*) from srcpart join srcpart_double_hour on (cast(srcpart.hr*2 as string) = cast(srcpart_double_hour.hr as string)) where srcpart_double_hour.hour = 11 @@ -1916,10 +1914,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1959,18 +1957,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + key expressions: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) sort order: + - Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -1988,7 +1986,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 UDFToString((UDFToDouble(_col0) * UDFToDouble(2))) (type: string) + 0 UDFToString((UDFToDouble(_col0) * 2.0)) (type: string) 1 UDFToString(_col0) (type: string) Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2012,8 +2010,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2044,16 +2042,13 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where cast(hr as string) = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where cast(hr as string) = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY @@ -2061,83 +2056,29 @@ POSTHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 - Spark - Edges: - Reducer 7 <- Map 6 (GROUP, 2) -#### A masked pattern was here #### - Vertices: - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - filterExpr: (ds = '2008-04-08') (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: ds (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -2145,18 +2086,16 @@ STAGE PLANS: filterExpr: (ds = '2008-04-08') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + keys: '2008-04-08' (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: '2008-04-08' (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -2164,9 +2103,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2188,22 +2127,22 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: '2008-04-08' (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2211,37 +2150,30 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[13][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: -- non-equi join EXPLAIN select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY @@ -2300,12 +2232,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 743248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col0 = _col2) or (_col1 = _col4)) (type: boolean) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 743248 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 743248 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2327,8 +2259,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2337,7 +2269,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[7][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select count(*) from srcpart, srcpart_date_hour where (srcpart_date_hour.`date` = '2008-04-08' and srcpart_date_hour.hour = 11) and (srcpart.ds = srcpart_date_hour.ds or srcpart.hr = srcpart_date_hour.hr) PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -2500,8 +2432,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2645,8 +2577,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2768,8 +2700,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2893,8 +2825,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -2925,10 +2857,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -2964,22 +2896,23 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3001,11 +2934,13 @@ STAGE PLANS: predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '11' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -3016,11 +2951,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '11' (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: '11' (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -3052,8 +2988,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -3104,29 +3040,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 5 Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3148,11 +3085,13 @@ STAGE PLANS: predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: '13' (type: string) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: '13' (type: string) + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 @@ -3163,11 +3102,12 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: '13' (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: '13' (type: string) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 3 Reduce Operator Tree: @@ -3199,8 +3139,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -3239,11 +3179,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2) + Reducer 13 <- Map 12 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 12 Map Operator Tree: TableScan alias: srcpart @@ -3256,12 +3197,12 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 9 Map Operator Tree: TableScan alias: srcpart @@ -3274,86 +3215,84 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3377,12 +3316,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -3395,16 +3334,16 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3430,8 +3369,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized @@ -3440,41 +3379,48 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3512,11 +3458,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 11 <- Reducer 10 (GROUP, 2), Reducer 13 (GROUP, 2) + Reducer 13 <- Map 12 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 12 Map Operator Tree: TableScan alias: srcpart @@ -3529,12 +3476,12 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 9 Map Operator Tree: TableScan alias: srcpart @@ -3547,86 +3494,84 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 11 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 + Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (GROUP, 2), Reducer 8 (GROUP, 2) + Reducer 8 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -3650,12 +3595,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -3668,16 +3613,16 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -3705,8 +3650,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized @@ -3715,41 +3660,48 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3788,13 +3740,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 13 <- Map 12 (GROUP, 1) - Reducer 15 <- Map 10 (GROUP, 1) - Reducer 17 <- Map 12 (GROUP, 1) + Reducer 12 <- Map 11 (GROUP, 1) + Reducer 13 <- Reducer 12 (GROUP, 2), Reducer 15 (GROUP, 2) + Reducer 15 <- Map 14 (GROUP, 1) + Reducer 17 <- Map 11 (GROUP, 1) + Reducer 18 <- Reducer 17 (GROUP, 2), Reducer 20 (GROUP, 2) + Reducer 20 <- Map 14 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: srcpart @@ -3807,12 +3761,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 12 + Map 14 Map Operator Tree: TableScan alias: srcpart @@ -3825,147 +3779,143 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 11 + Reducer 12 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 13 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 Reducer 15 Execution mode: vectorized Reduce Operator Tree: Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 17 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 18 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 4 - Reducer 17 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 4 + Reducer 20 Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 4 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: + Reducer 10 <- Map 9 (GROUP, 1) Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Map 1 (GROUP, 2) Reducer 7 <- Map 6 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 10 (GROUP, 2), Reducer 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: ds (type: string) @@ -3990,12 +3940,12 @@ STAGE PLANS: aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 9 Map Operator Tree: TableScan alias: srcpart @@ -4008,11 +3958,29 @@ STAGE PLANS: aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 10 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 2 Execution mode: vectorized Reduce Operator Tree: @@ -4030,7 +3998,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -4040,8 +4008,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Execution mode: vectorized @@ -4063,41 +4031,30 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 8 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4145,10 +4102,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -4224,8 +4181,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4256,14 +4213,10 @@ POSTHOOK: Input: default@srcpart_date PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart #### A masked pattern was here #### 1000 PREHOOK: query: -- multiple sources, single key @@ -4288,10 +4241,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -4322,10 +4275,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 344 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: string) @@ -4410,8 +4363,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4446,12 +4399,10 @@ POSTHOOK: Input: default@srcpart_hour PREHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 and ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- multiple columns single source @@ -4474,10 +4425,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and (date = '2008-04-08') and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((date = '2008-04-08') and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -4566,8 +4517,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4598,12 +4549,10 @@ POSTHOOK: Input: default@srcpart_date_hour PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' and hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: -- empty set @@ -4626,10 +4575,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + filterExpr: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = 'I DONT EXIST')) (type: boolean) + predicate: ((date = 'I DONT EXIST') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -4705,8 +4654,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4741,10 +4690,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -4820,8 +4769,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4867,10 +4816,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_double_hour - filterExpr: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (hr is not null and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0) and hr is not null) (type: boolean) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hr (type: double) @@ -4878,7 +4827,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) Select Operator expressions: _col0 (type: double) @@ -4918,7 +4867,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 (UDFToDouble(_col0) * UDFToDouble(2)) (type: double) + 0 (UDFToDouble(_col0) * 2.0) (type: double) 1 _col0 (type: double) input vertices: 1 Map 3 @@ -4946,8 +4895,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -4978,16 +4927,13 @@ POSTHOOK: Input: default@srcpart_double_hour PREHOOK: query: select count(*) from srcpart where hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY @@ -5013,18 +4959,16 @@ STAGE PLANS: filterExpr: (ds = '2008-04-08') (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: '2008-04-08' (type: string) - outputColumnNames: ds Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: ds (type: string) + keys: '2008-04-08' (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: '2008-04-08' (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: vectorized @@ -5032,28 +4976,16 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: '2008-04-08' (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Spark HashTable Sink Operator + keys: + 0 + 1 Stage: Stage-1 Spark @@ -5065,21 +4997,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 + 1 input vertices: 1 Reducer 4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -5103,8 +5033,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5113,34 +5043,27 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where ds = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: -- left join @@ -5242,8 +5165,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5368,8 +5291,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5496,8 +5419,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5528,10 +5451,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -5568,11 +5491,13 @@ STAGE PLANS: predicate: ((UDFToDouble(hour) = 11.0) and (UDFToDouble(hr) = 11.0)) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 '11' (type: string) - 1 '11' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -5589,8 +5514,8 @@ STAGE PLANS: alias: srcpart Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -5598,6 +5523,7 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 input vertices: 1 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -5605,8 +5531,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 '11' (type: string) - 1 '11' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) input vertices: 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE @@ -5633,8 +5559,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5683,14 +5609,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + filterExpr: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (ds is not null and (UDFToDouble(hr) = 13.0)) (type: boolean) + predicate: ((UDFToDouble(hr) = 13.0) and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + expressions: ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: @@ -5708,10 +5634,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date - filterExpr: (ds is not null and (date = '2008-04-08')) (type: boolean) + filterExpr: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and (date = '2008-04-08')) (type: boolean) + predicate: ((date = '2008-04-08') and ds is not null) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) @@ -5723,13 +5649,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 input vertices: 0 Map 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Spark HashTable Sink Operator keys: - 0 '13' (type: string) - 1 '13' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -5750,13 +5677,15 @@ STAGE PLANS: predicate: (UDFToDouble(hr) = 13.0) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Select Operator + expressions: hr (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 '13' (type: string) - 1 '13' (type: string) + 0 _col1 (type: string) + 1 _col0 (type: string) input vertices: 0 Map 2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -5784,8 +5713,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -5817,11 +5746,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -5831,15 +5761,15 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ds) + aggregations: max(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 8 + Map 6 Map Operator Tree: TableScan alias: srcpart @@ -5849,156 +5779,117 @@ STAGE PLANS: outputColumnNames: ds Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: min(ds) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 11 + Reducer 4 Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 - Reducer 9 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reducer 5 Execution mode: vectorized + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Spark Partition Pruning Sink Operator - partition key expr: ds - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - target column name: ds - target work: Map 1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + partition key expr: ds + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + target column name: ds + target work: Map 1 + Reducer 7 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 Spark Edges: - Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 1) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(ds) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ds (type: string) - outputColumnNames: ds - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ds) - mode: hash + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + input vertices: + 1 Reducer 5 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -6010,51 +5901,9 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -6135,10 +5984,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour - filterExpr: (ds is not null and hr is not null and ((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) (type: boolean) + filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 1440 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ds is not null and hr is not null and ((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0)) (type: boolean) + predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 720 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) @@ -6225,8 +6074,8 @@ STAGE PLANS: compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 @@ -6257,14 +6106,10 @@ POSTHOOK: Input: default@srcpart_orc@ds=2008-04-09/hr=12 PREHOOK: query: select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart where (ds = '2008-04-08' or ds = '2008-04-09') and hr = 11 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 PREHOOK: query: drop table srcpart_orc