diff --git a/pom.xml b/pom.xml index ecec5af..79123fc 100644 --- a/pom.xml +++ b/pom.xml @@ -100,7 +100,8 @@ 3.4 1.7.5 0.8.0.RELEASE - 1.0.0-incubating-SNAPSHOT + + 1.0.0-incubating 3.2.6 3.2.10 3.2.9 @@ -170,6 +171,20 @@ + + + calcite-rc + calcite-rc1 maven repository + https://repository.apache.org/content/repositories/orgapachecalcite-1003 + default + + true + warn + + + true + + datanucleus datanucleus maven repository diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 1a50a46..09cece0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -180,7 +180,7 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode) { } // We have valid pruning expressions, only retrieve qualifying partitions - ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true)); + ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true, getRelOptSchema().getTypeFactory())); partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache); } catch (HiveException he) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java index cce65f1..446085e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java @@ -33,12 +33,15 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeUtil; /* * convert a RexNode to an ExprNodeDesc @@ -48,8 +51,9 @@ RelDataType rType; String tabAlias; boolean partitioningExpr; + private final RelDataTypeFactory dTFactory; - public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr) { + public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { super(true); /* * hb: 6/25/14 for now we only support expressions that only contain @@ -62,6 +66,7 @@ public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitionin this.tabAlias = tabAlias; this.rType = rType; this.partitioningExpr = partitioningExpr; + this.dTFactory = dTFactory; } @Override @@ -85,9 +90,15 @@ public ExprNodeDesc visitCall(RexCall call) { args.add(operand.accept(this)); } - // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the - // exprnode - if (ASTConverter.isFlat(call)) { + // If Call is a redundant cast then bail out. Ex: cast(true)BOOLEAN + if (call.isA(SqlKind.CAST) + && (call.operands.size() == 1) + && SqlTypeUtil.equalSansNullability(dTFactory, call.getType(), + call.operands.get(0).getType())) { + return args.get(0); + } else if (ASTConverter.isFlat(call)) { + // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the + // exprnode ArrayList tmpExprArgs = new ArrayList(); tmpExprArgs.addAll(args.subList(0, 2)); gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 3a613a2..60ca260 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -78,12 +78,14 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexExecutorImpl; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Schemas; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlExplainLevel; @@ -716,7 +718,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE); hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE); hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); - hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); + hepPgmBldr.addRuleInstance(ProjectRemoveRule.NAME_CALC_INSTANCE); hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); hepPgm = hepPgmBldr.build(); @@ -790,7 +792,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, - HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); + HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY, true); basePlan = fieldTrimmer.trim(basePlan); // 5. Rerun PPD through Project as column pruning would have introduced DT @@ -833,6 +835,11 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, basePlan.getCluster().setMetadataProvider( new CachingRelMetadataProvider(chainedProvider, planner)); + // Executor is required for constant-reduction rules; see [CALCITE-566] + final RexExecutorImpl executor = + new RexExecutorImpl(Schemas.createDataContext(null)); + basePlan.getCluster().getPlanner().setExecutor(executor); + planner.setRoot(basePlan); optimizedRelNode = planner.findBestExp(); diff --git a/ql/src/test/results/clientpositive/ppd_union_view.q.out b/ql/src/test/results/clientpositive/ppd_union_view.q.out index 5ed89ba..1aa03cc 100644 --- a/ql/src/test/results/clientpositive/ppd_union_view.q.out +++ b/ql/src/test/results/clientpositive/ppd_union_view.q.out @@ -471,8 +471,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: string) - 1 _col1 (type: string), _col2 (type: string) + 0 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out index af45948..18c19ed 100644 --- a/ql/src/test/results/clientpositive/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/subquery_views.q.out @@ -97,10 +97,10 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_11') and (key is null or (value is null or key is null))) (type: boolean) - Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -252,10 +252,10 @@ STAGE PLANS: alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value > 'val_11') and (key is null or (value is null or key is null))) (type: boolean) - Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index d6f180b..df262de 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -1773,6 +1773,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY @@ -1796,17 +1797,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1833,9 +1830,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1866,26 +1863,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 Stage: Stage-0 Fetch Operator @@ -1893,21 +1875,18 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' @@ -4082,6 +4061,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY @@ -4105,21 +4085,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 + 1 input vertices: 1 Reducer 4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -4170,26 +4148,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 Stage: Stage-0 Fetch Operator @@ -4197,21 +4160,18 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' diff --git a/ql/src/test/results/clientpositive/tez/mrr.q.out b/ql/src/test/results/clientpositive/tez/mrr.q.out index 7a915a2..e7a3b3c 100644 --- a/ql/src/test/results/clientpositive/tez/mrr.q.out +++ b/ql/src/test/results/clientpositive/tez/mrr.q.out @@ -1418,18 +1418,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) + value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: bigint), VALUE._col3 (type: string), VALUE._col4 (type: bigint) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1694,14 +1694,14 @@ STAGE PLANS: 1 Reducer 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: string) + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + value expressions: _col1 (type: bigint), _col3 (type: string) Map 3 Map Operator Tree: TableScan @@ -1729,7 +1729,7 @@ STAGE PLANS: Reducer 2 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index ed12948..224a9db 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -1793,6 +1793,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY @@ -1816,17 +1817,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1853,9 +1850,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -1887,26 +1884,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -1914,21 +1897,18 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08' @@ -4128,6 +4108,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY @@ -4151,21 +4132,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ds is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + filterExpr: (ds = '2008-04-08') (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + 0 + 1 input vertices: 1 Reducer 4 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -4217,26 +4196,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Dynamic Partitioning Event Operator - Target Input: srcpart - Partition key expr: ds - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Target column: ds - Target Vertex: Map 1 + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -4244,21 +4209,18 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08' POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 1000 PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'