diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index 020d7f3..12bb1d7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -156,7 +156,7 @@ public boolean isHiddenVirtualCol() { */ @Override public String toString() { - return internalName + ": " + objectInspector.getTypeName(); + return internalName + ": " + typeName; } public void setAlias(String col_alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index cc515c1..e1f74f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -145,9 +145,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, } int groupingSetPosition = conf.getGroupingSetPosition(); if (groupingSetPosition >= 0) { - List cols = cppCtx.genColLists(op); + List neededCols = cppCtx.genColLists(op); String groupingColumn = conf.getOutputColumnNames().get(groupingSetPosition); - if (!cols.contains(groupingColumn)) { + if (!neededCols.contains(groupingColumn)) { conf.getOutputColumnNames().remove(groupingSetPosition); if (op.getSchema() != null) { op.getSchema().getSignature().remove(groupingSetPosition); @@ -155,6 +155,36 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, } } + // If the child has a different schema, we create a Project operator between them both, + // as we cannot prune the columns in the GroupBy operator + for (Operator child : op.getChildOperators()) { + if (cppCtx.getPrunedColList(child).size() != op.getSchema().getSignature().size() + && !(child instanceof SelectOperator || child instanceof ReduceSinkOperator)) { + ArrayList exprs = new ArrayList(); + ArrayList outputs = new ArrayList(); + Map colExprMap = new HashMap(); + ArrayList outputRS = new ArrayList(); + for (String internalName : cppCtx.getPrunedColList(child)) { + ColumnInfo colInfo = op.getSchema().getColumnInfo(internalName); + ExprNodeDesc colDesc = new ExprNodeColumnDesc(colInfo.getType(), + colInfo.getInternalName(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + exprs.add(colDesc); + outputs.add(colInfo.getInternalName()); + ColumnInfo newCol = new ColumnInfo(colInfo.getInternalName(), colInfo.getType(), + colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); + newCol.setAlias(colInfo.getAlias()); + outputRS.add(newCol); + colExprMap.put(colInfo.getInternalName(), colDesc); + } + SelectDesc select = new SelectDesc(exprs, outputs, false); + op.removeChild(child); + SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild( + select, new RowSchema(outputRS), op); + OperatorFactory.makeChild(sel, child); + sel.setColumnExprMap(colExprMap); + } + } + cppCtx.getPrunedColLists().put(op, colLists); return null; } diff --git ql/src/test/queries/clientpositive/join43.q ql/src/test/queries/clientpositive/join43.q new file mode 100644 index 0000000..68694c6 --- /dev/null +++ ql/src/test/queries/clientpositive/join43.q @@ -0,0 +1,83 @@ +create table purchase_history (s string, product string, price double, time int); +insert into purchase_history values ('1', 'Belt', 20.00, 21); +insert into purchase_history values ('1', 'Socks', 3.50, 31); +insert into purchase_history values ('3', 'Belt', 20.00, 51); +insert into purchase_history values ('4', 'Shirt', 15.50, 59); + +create table cart_history (s string, cart_id int, time int); +insert into cart_history values ('1', 1, 10); +insert into cart_history values ('1', 2, 20); +insert into cart_history values ('1', 3, 30); +insert into cart_history values ('1', 4, 40); +insert into cart_history values ('3', 5, 50); +insert into cart_history values ('4', 6, 60); + +create table events (s string, st2 string, n int, time int); +insert into events values ('1', 'Bob', 1234, 20); +insert into events values ('1', 'Bob', 1234, 30); +insert into events values ('1', 'Bob', 1234, 25); +insert into events values ('2', 'Sam', 1234, 30); +insert into events values ('3', 'Jeff', 1234, 50); +insert into events values ('4', 'Ted', 1234, 60); + +explain +select s +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list; + +select s +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list; + +explain +select * +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list; + +select * +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list; diff --git ql/src/test/results/clientpositive/having.q.out ql/src/test/results/clientpositive/having.q.out index 2ea8f42..8682a46 100644 --- ql/src/test/results/clientpositive/having.q.out +++ ql/src/test/results/clientpositive/having.q.out @@ -38,20 +38,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 3) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 3) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join43.q.out ql/src/test/results/clientpositive/join43.q.out new file mode 100644 index 0000000..f22a9b8 --- /dev/null +++ ql/src/test/results/clientpositive/join43.q.out @@ -0,0 +1,648 @@ +PREHOOK: query: create table purchase_history (s string, product string, price double, time int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@purchase_history +POSTHOOK: query: create table purchase_history (s string, product string, price double, time int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@purchase_history +PREHOOK: query: insert into purchase_history values ('1', 'Belt', 20.00, 21) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@purchase_history +POSTHOOK: query: insert into purchase_history values ('1', 'Belt', 20.00, 21) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@purchase_history +POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into purchase_history values ('1', 'Socks', 3.50, 31) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@purchase_history +POSTHOOK: query: insert into purchase_history values ('1', 'Socks', 3.50, 31) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@purchase_history +POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into purchase_history values ('3', 'Belt', 20.00, 51) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@purchase_history +POSTHOOK: query: insert into purchase_history values ('3', 'Belt', 20.00, 51) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@purchase_history +POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into purchase_history values ('4', 'Shirt', 15.50, 59) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@purchase_history +POSTHOOK: query: insert into purchase_history values ('4', 'Shirt', 15.50, 59) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@purchase_history +POSTHOOK: Lineage: purchase_history.price EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.product SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.s SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: purchase_history.time EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: create table cart_history (s string, cart_id int, time int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cart_history +POSTHOOK: query: create table cart_history (s string, cart_id int, time int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cart_history +PREHOOK: query: insert into cart_history values ('1', 1, 10) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@cart_history +POSTHOOK: query: insert into cart_history values ('1', 1, 10) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@cart_history +POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into cart_history values ('1', 2, 20) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@cart_history +POSTHOOK: query: insert into cart_history values ('1', 2, 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@cart_history +POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into cart_history values ('1', 3, 30) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@cart_history +POSTHOOK: query: insert into cart_history values ('1', 3, 30) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@cart_history +POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into cart_history values ('1', 4, 40) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@cart_history +POSTHOOK: query: insert into cart_history values ('1', 4, 40) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@cart_history +POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into cart_history values ('3', 5, 50) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@cart_history +POSTHOOK: query: insert into cart_history values ('3', 5, 50) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@cart_history +POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: insert into cart_history values ('4', 6, 60) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@cart_history +POSTHOOK: query: insert into cart_history values ('4', 6, 60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@cart_history +POSTHOOK: Lineage: cart_history.cart_id EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.s SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: cart_history.time EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: create table events (s string, st2 string, n int, time int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@events +POSTHOOK: query: create table events (s string, st2 string, n int, time int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@events +PREHOOK: query: insert into events values ('1', 'Bob', 1234, 20) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@events +POSTHOOK: query: insert into events values ('1', 'Bob', 1234, 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@events +POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into events values ('1', 'Bob', 1234, 30) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@events +POSTHOOK: query: insert into events values ('1', 'Bob', 1234, 30) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@events +POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into events values ('1', 'Bob', 1234, 25) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@events +POSTHOOK: query: insert into events values ('1', 'Bob', 1234, 25) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@events +POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into events values ('2', 'Sam', 1234, 30) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@events +POSTHOOK: query: insert into events values ('2', 'Sam', 1234, 30) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@events +POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into events values ('3', 'Jeff', 1234, 50) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@events +POSTHOOK: query: insert into events values ('3', 'Jeff', 1234, 50) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@events +POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into events values ('4', 'Ted', 1234, 60) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__16 +PREHOOK: Output: default@events +POSTHOOK: query: insert into events values ('4', 'Ted', 1234, 60) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__16 +POSTHOOK: Output: default@events +POSTHOOK: Lineage: events.n EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: events.s SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: events.st2 SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: events.time EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: explain +select s +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +PREHOOK: type: QUERY +POSTHOOK: query: explain +select s +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: purchase_history + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), time (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + TableScan + alias: cart_history + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), time (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3, _col6 + Statistics: Num rows: 3 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 > _col6) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col6) + keys: _col0 (type: string), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: events + Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s is not null and time is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), time (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col3 (type: int) + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col3 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +PREHOOK: type: QUERY +PREHOOK: Input: default@cart_history +PREHOOK: Input: default@events +PREHOOK: Input: default@purchase_history +#### A masked pattern was here #### +POSTHOOK: query: select s +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cart_history +POSTHOOK: Input: default@events +POSTHOOK: Input: default@purchase_history +#### A masked pattern was here #### +1 +1 +3 +PREHOOK: query: explain +select * +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: purchase_history + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), time (type: int) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int) + TableScan + alias: cart_history + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: s is not null (type: boolean) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), time (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3, _col6 + Statistics: Num rows: 3 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col3 > _col6) (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col6) + keys: _col0 (type: string), _col3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + alias: events + Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (s is not null and time is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s (type: string), st2 (type: string), n (type: int), time (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col3 (type: int) + Statistics: Num rows: 2 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col2 (type: int) + 1 _col0 (type: string), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +PREHOOK: type: QUERY +PREHOOK: Input: default@cart_history +PREHOOK: Input: default@events +PREHOOK: Input: default@purchase_history +#### A masked pattern was here #### +POSTHOOK: query: select * +from ( + select last.*, action.st2, action.n + from ( + select purchase.s, purchase.time, max (mevt.time) as last_stage_time + from (select * from purchase_history) purchase + join (select * from cart_history) mevt + on purchase.s = mevt.s + where purchase.time > mevt.time + group by purchase.s, purchase.time + ) last + join (select * from events) action + on last.s = action.s and last.last_stage_time = action.time +) list +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cart_history +POSTHOOK: Input: default@events +POSTHOOK: Input: default@purchase_history +#### A masked pattern was here #### +1 21 20 Bob 1234 +1 31 30 Bob 1234 +3 51 50 Jeff 1234 diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out index 9d8b132..8cabfa7 100644 --- ql/src/test/results/clientpositive/subquery_in_having.q.out +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -169,24 +169,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -381,24 +385,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col2 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col2 is not null (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint), _col1 (type: string) - mode: hash + Select Operator + expressions: _col2 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -920,24 +928,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1016,24 +1028,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash + Select Operator + expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out index c32504e..775f477 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out @@ -680,22 +680,26 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) > 600.0) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator + Select Operator + expressions: _col1 (type: double), _col2 (type: double) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) > 600.0) (type: boolean) Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-6 Map Reduce diff --git ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 2256f6e..5d6d4a8 100644 --- ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -756,24 +756,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col2 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col2 is not null (type: boolean) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint), _col1 (type: string) - mode: hash + Select Operator + expressions: _col2 (type: bigint), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: bigint), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index d39da20..37403c4 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -2743,7 +2743,7 @@ Stage-0 keys:_col0 (type: string), _col1 (type: bigint) outputColumnNames:["_col0","_col1","_col2"] Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_62] + Merge Join Operator [MERGEJOIN_63] | condition map:[{"":"Left Semi Join 0 to 1"},{"":"Left Semi Join 0 to 2"}] | keys:{"2":"_col0 (type: string)","1":"_col0 (type: string)","0":"_col0 (type: string)"} | outputColumnNames:["_col0","_col1"] @@ -2791,33 +2791,36 @@ Stage-0 | Statistics:Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE | Filter Operator [FIL_58] | predicate:(((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0) and ((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1))) (type: boolean) - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | Group By Operator [GBY_8] - | | aggregations:["sum(VALUE._col0)"] - | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_7] - | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | sort order:+++ - | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col3 (type: bigint) - | Group By Operator [GBY_6] - | aggregations:["sum(_col1)"] - | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) - | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_62] + | outputColumnNames:["_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + | Group By Operator [GBY_8] + | | aggregations:["sum(VALUE._col0)"] + | | keys:KEY._col0 (type: float), KEY._col1 (type: int), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_7] + | key expressions:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | sort order:+++ | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_2] - | outputColumnNames:["_col0","_col1","_col2"] - | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_59] - | predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean) + | value expressions:_col3 (type: bigint) + | Group By Operator [GBY_6] + | aggregations:["sum(_col1)"] + | keys:_col0 (type: float), _col1 (type: int), _col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:cbo_t1 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_59] + | predicate:((((((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and (c_float > 0.0)) and ((c_int >= 1) or (c_float >= 1.0))) and ((UDFToFloat(c_int) + c_float) >= 0.0)) and (UDFToDouble(key) > 0.0)) and key is not null) (type: boolean) + | Statistics:Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE |<-Reducer 9 [SIMPLE_EDGE] Reduce Output Operator [RS_39] key expressions:_col0 (type: string) @@ -3486,7 +3489,7 @@ Stage-0 compressed:false Statistics:Num rows: 34 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE table:{"serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"} - Merge Join Operator [MERGEJOIN_48] + Merge Join Operator [MERGEJOIN_49] | condition map:[{"":"Left Semi Join 0 to 1"}] | keys:{"1":"_col0 (type: bigint)","0":"_col2 (type: bigint)"} | outputColumnNames:["_col0","_col1","_col2"] @@ -3518,7 +3521,7 @@ Stage-0 | keys:_col0 (type: string), _col1 (type: string) | outputColumnNames:["_col0","_col1","_col2"] | Statistics:Num rows: 84 Data size: 15624 Basic stats: COMPLETE Column stats: COMPLETE - | Merge Join Operator [MERGEJOIN_47] + | Merge Join Operator [MERGEJOIN_48] | | condition map:[{"":"Left Semi Join 0 to 1"}] | | keys:{"1":"_col0 (type: string)","0":"_col0 (type: string)"} | | outputColumnNames:["_col0","_col1"] @@ -3573,33 +3576,36 @@ Stage-0 Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator [FIL_45] predicate:_col1 is not null (type: boolean) - Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_26] - | aggregations:["count(VALUE._col0)"] - | keys:KEY._col0 (type: string) - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_25] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col1 (type: bigint) - Group By Operator [GBY_24] - aggregations:["count()"] - keys:_col0 (type: string) - outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_47] + outputColumnNames:["_col1"] + Statistics:Num rows: 69 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_26] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_25] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_22] - outputColumnNames:["_col0"] - Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_46] - predicate:(key > '9') (type: boolean) + value expressions:_col1 (type: bigint) + Group By Operator [GBY_24] + aggregations:["count()"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 69 Data size: 6555 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_22] + outputColumnNames:["_col0"] Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_20] - alias:b - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_46] + predicate:(key > '9') (type: boolean) + Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_20] + alias:b + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select p_mfgr, p_name, avg(p_size) from part group by p_mfgr, p_name diff --git ql/src/test/results/clientpositive/tez/having.q.out ql/src/test/results/clientpositive/tez/having.q.out index 6fc60d5..80f02de 100644 --- ql/src/test/results/clientpositive/tez/having.q.out +++ ql/src/test/results/clientpositive/tez/having.q.out @@ -44,20 +44,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 3) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 3) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator