diff --git a/pom.xml b/pom.xml
index ecec5af..79123fc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -100,7 +100,8 @@
3.4
1.7.5
0.8.0.RELEASE
- 1.0.0-incubating-SNAPSHOT
+
+ 1.0.0-incubating
3.2.6
3.2.10
3.2.9
@@ -170,6 +171,20 @@
+
+
+ calcite-rc
+ calcite-rc1 maven repository
+ https://repository.apache.org/content/repositories/orgapachecalcite-1003
+ default
+
+ true
+ warn
+
+
+ true
+
+
datanucleus
datanucleus maven repository
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index 1a50a46..09cece0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -180,7 +180,7 @@ public void computePartitionList(HiveConf conf, RexNode pruneNode) {
}
// We have valid pruning expressions, only retrieve qualifying partitions
- ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true));
+ ExprNodeDesc pruneExpr = pruneNode.accept(new ExprNodeConverter(getName(), getRowType(), true, getRelOptSchema().getTypeFactory()));
partitionList = PartitionPruner.prune(hiveTblMetadata, pruneExpr, conf, getName(), partitionCache);
} catch (HiveException he) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
index cce65f1..446085e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java
@@ -33,12 +33,15 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexVisitorImpl;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlTypeUtil;
/*
* convert a RexNode to an ExprNodeDesc
@@ -48,8 +51,9 @@
RelDataType rType;
String tabAlias;
boolean partitioningExpr;
+ private final RelDataTypeFactory dTFactory;
- public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr) {
+ public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr, RelDataTypeFactory dTFactory) {
super(true);
/*
* hb: 6/25/14 for now we only support expressions that only contain
@@ -62,6 +66,7 @@ public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitionin
this.tabAlias = tabAlias;
this.rType = rType;
this.partitioningExpr = partitioningExpr;
+ this.dTFactory = dTFactory;
}
@Override
@@ -85,9 +90,15 @@ public ExprNodeDesc visitCall(RexCall call) {
args.add(operand.accept(this));
}
- // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the
- // exprnode
- if (ASTConverter.isFlat(call)) {
+ // If Call is a redundant cast then bail out. Ex: cast(true)BOOLEAN
+ if (call.isA(SqlKind.CAST)
+ && (call.operands.size() == 1)
+ && SqlTypeUtil.equalSansNullability(dTFactory, call.getType(),
+ call.operands.get(0).getType())) {
+ return args.get(0);
+ } else if (ASTConverter.isFlat(call)) {
+ // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the
+ // exprnode
ArrayList tmpExprArgs = new ArrayList();
tmpExprArgs.addAll(args.subList(0, 2));
gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 3a613a2..193fb78 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -78,12 +78,14 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexExecutorImpl;
import org.apache.calcite.rex.RexFieldCollation;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexWindowBound;
import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.Schemas;
import org.apache.calcite.sql.SqlAggFunction;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlExplainLevel;
@@ -833,6 +835,11 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges,
basePlan.getCluster().setMetadataProvider(
new CachingRelMetadataProvider(chainedProvider, planner));
+ // Executor is required for constant-reduction rules; see [CALCITE-566]
+ final RexExecutorImpl executor =
+ new RexExecutorImpl(Schemas.createDataContext(null));
+ basePlan.getCluster().getPlanner().setExecutor(executor);
+
planner.setRoot(basePlan);
optimizedRelNode = planner.findBestExp();
diff --git a/ql/src/test/results/clientpositive/ppd_union_view.q.out b/ql/src/test/results/clientpositive/ppd_union_view.q.out
index 5ed89ba..1aa03cc 100644
--- a/ql/src/test/results/clientpositive/ppd_union_view.q.out
+++ b/ql/src/test/results/clientpositive/ppd_union_view.q.out
@@ -471,8 +471,8 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string), _col2 (type: string)
- 1 _col1 (type: string), _col2 (type: string)
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
outputColumnNames: _col1, _col3
Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
Select Operator
diff --git a/ql/src/test/results/clientpositive/subquery_views.q.out b/ql/src/test/results/clientpositive/subquery_views.q.out
index af45948..18c19ed 100644
--- a/ql/src/test/results/clientpositive/subquery_views.q.out
+++ b/ql/src/test/results/clientpositive/subquery_views.q.out
@@ -97,10 +97,10 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value > 'val_11') and (key is null or (value is null or key is null))) (type: boolean)
- Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -252,10 +252,10 @@ STAGE PLANS:
alias: b
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: ((value > 'val_11') and (key is null or (value is null or key is null))) (type: boolean)
- Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE
+ predicate: ((value > 'val_11') and (key is null or value is null)) (type: boolean)
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 249 Data size: 2645 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
index d6f180b..df262de 100644
--- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out
@@ -1773,6 +1773,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: -- parent is reduce tasks
EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
@@ -1796,17 +1797,13 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: (ds = '2008-04-08') (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ sort order:
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -1833,9 +1830,9 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ 0
+ 1
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -1866,26 +1863,11 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Dynamic Partitioning Event Operator
- Target Input: srcpart
- Partition key expr: ds
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Target column: ds
- Target Vertex: Map 1
Stage: Stage-0
Fetch Operator
@@ -1893,21 +1875,18 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
@@ -4082,6 +4061,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
+Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: -- parent is reduce tasks
EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
@@ -4105,21 +4085,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: (ds = '2008-04-08') (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
+ 0
+ 1
input vertices:
1 Reducer 4
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -4170,26 +4148,11 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Dynamic Partitioning Event Operator
- Target Input: srcpart
- Partition key expr: ds
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Target column: ds
- Target Vertex: Map 1
Stage: Stage-0
Fetch Operator
@@ -4197,21 +4160,18 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
diff --git a/ql/src/test/results/clientpositive/tez/mrr.q.out b/ql/src/test/results/clientpositive/tez/mrr.q.out
index 7a915a2..e7a3b3c 100644
--- a/ql/src/test/results/clientpositive/tez/mrr.q.out
+++ b/ql/src/test/results/clientpositive/tez/mrr.q.out
@@ -1418,18 +1418,18 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: bigint), _col4 (type: string), _col5 (type: bigint)
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
+ value expressions: _col1 (type: bigint), _col3 (type: bigint), _col4 (type: string), _col5 (type: bigint)
Reducer 5
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: bigint), VALUE._col3 (type: string), VALUE._col4 (type: bigint)
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1694,14 +1694,14 @@ STAGE PLANS:
1 Reducer 4
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col3 (type: bigint), _col0 (type: string), _col1 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: _col0 (type: string), _col3 (type: bigint), _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: string)
+ value expressions: _col1 (type: bigint), _col3 (type: string)
Map 3
Map Operator Tree:
TableScan
@@ -1729,7 +1729,7 @@ STAGE PLANS:
Reducer 2
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: string)
+ expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
index ed12948..224a9db 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out
@@ -1793,6 +1793,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: -- parent is reduce tasks
EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
@@ -1816,17 +1817,13 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: (ds = '2008-04-08') (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ sort order:
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map 4
Map Operator Tree:
TableScan
@@ -1853,9 +1850,9 @@ STAGE PLANS:
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ 0
+ 1
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -1887,26 +1884,12 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Dynamic Partitioning Event Operator
- Target Input: srcpart
- Partition key expr: ds
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Target column: ds
- Target Vertex: Map 1
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -1914,21 +1897,18 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'
@@ -4128,6 +4108,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
#### A masked pattern was here ####
1000
+Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: -- parent is reduce tasks
EXPLAIN select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
@@ -4151,21 +4132,19 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: srcpart
- filterExpr: ds is not null (type: boolean)
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ filterExpr: (ds = '2008-04-08') (type: boolean)
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: ds (type: string)
- outputColumnNames: _col0
- Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col0 (type: string)
- 1 _col0 (type: string)
+ 0
+ 1
input vertices:
1 Reducer 4
- Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -4217,26 +4196,12 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0
+ Reduce Output Operator
+ sort order:
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Dynamic Partitioning Event Operator
- Target Input: srcpart
- Partition key expr: ds
- Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- Target column: ds
- Target Vertex: Map 1
+ Execution mode: vectorized
Stage: Stage-0
Fetch Operator
@@ -4244,21 +4209,18 @@ STAGE PLANS:
Processor Tree:
ListSink
+Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Map 1' is a cross product
PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
POSTHOOK: query: select count(*) from srcpart join (select ds as ds, ds as date from srcpart group by ds) s on (srcpart.ds = s.ds) where s.date = '2008-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
-POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
1000
PREHOOK: query: select count(*) from srcpart where ds = '2008-04-08'