diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 03194a4..609ba7f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -322,13 +322,12 @@ protected void initializeOp(Configuration hconf) throws HiveException { transient boolean newGroupStarted = false; @Override - public void startGroup() throws HiveException { + public void startGroupOp() throws HiveException { LOG.trace("Join: Starting new group"); newGroupStarted = true; for (AbstractRowContainer> alw : storage) { alw.clearRows(); } - super.startGroup(); } protected long getNextSize(long sz) { @@ -631,7 +630,7 @@ protected final short getFilterTag(List row) { * @throws HiveException */ @Override - public void endGroup() throws HiveException { + public void endGroupOp() throws HiveException { LOG.trace("Join Op: endGroup called: numValues=" + numAliases); checkAndGenObject(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java index 772dda6..dc4a506 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DemuxOperator.java @@ -85,9 +85,6 @@ private int childrenDone; - // The index of the child which the last row was forwarded to in a key group. - private int lastChildIndex; - // Since DemuxOperator may appear multiple times in MuxOperator's parents list. // We use newChildIndexTag instead of childOperatorsTag. // Example: @@ -247,13 +244,6 @@ protected void initializeChildren(Configuration hconf) throws HiveException { public void processOp(Object row, int tag) throws HiveException { int currentChildIndex = newTagToChildIndex[tag]; - // Check if we start to forward rows to a new child. - // If so, in the current key group, rows will not be forwarded - // to those children which have an index less than the currentChildIndex. - // We can call flush the buffer of children from lastChildIndex (inclusive) - // to currentChildIndex (exclusive) and propagate processGroup to those children. - endGroupIfNecessary(currentChildIndex); - int oldTag = newTagToOldTag[tag]; if (isLogInfoEnabled) { cntrs[tag]++; @@ -296,52 +286,14 @@ protected void closeOp(boolean abort) throws HiveException { } } - /** - * We assume that the input rows associated with the same key are ordered by - * the tag. Because a tag maps to a childindex, when we see a new childIndex, - * we will not see the last childIndex (lastChildIndex) again before we start - * a new key group. So, we can call flush the buffer of children - * from lastChildIndex (inclusive) to currentChildIndex (exclusive) and - * propagate processGroup to those children. - * @param currentChildIndex the childIndex we have right now. - * @throws HiveException - */ - private void endGroupIfNecessary(int currentChildIndex) throws HiveException { - if (lastChildIndex != currentChildIndex) { - for (int i = lastChildIndex; i < currentChildIndex; i++) { - Operator child = childOperatorsArray[i]; - child.flush(); - child.endGroup(); - for (int childTag: newChildOperatorsTag[i]) { - child.processGroup(childTag); - } - } - lastChildIndex = currentChildIndex; - } - } - @Override - public void startGroup() throws HiveException { - lastChildIndex = 0; - super.startGroup(); - } - - @Override - public void endGroup() throws HiveException { - if (childOperators == null) { - return; - } - + public void endGroupOp() throws HiveException { // We will start a new key group. We can call flush the buffer // of children from lastChildIndex (inclusive) to the last child and // propagate processGroup to those children. - for (int i = lastChildIndex; i < childOperatorsArray.length; i++) { - Operator child = childOperatorsArray[i]; - child.flush(); + for (Operator child : childOperatorsArray) { child.endGroup(); - for (int childTag: newChildOperatorsTag[i]) { - child.processGroup(childTag); - } + child.flush(); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 1dde78e..fe1c11c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -811,7 +811,7 @@ private String getDynPartDirectory(List row, List dpColNames, in } @Override - public void startGroup() throws HiveException { + public void startGroupOp() throws HiveException { if (!conf.getDpSortState().equals(DPSortState.NONE)) { keyOI = getGroupKeyObjectInspector(); keys.clear(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 792d87f..2903af8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -716,13 +716,12 @@ protected void updateAggregations(AggregationBuffer[] aggs, Object row, } @Override - public void startGroup() throws HiveException { + public void startGroupOp() throws HiveException { firstRowInGroup = true; - super.startGroup(); } @Override - public void endGroup() throws HiveException { + public void endGroupOp() throws HiveException { if (groupKeyIsNotReduceKey) { keysCurrentGroup.clear(); } @@ -1068,10 +1067,9 @@ protected void forward(Object[] keys, * Forward all aggregations to children. It is only used by DemuxOperator. * @throws HiveException */ - @Override - public void flush() throws HiveException{ + protected void flushOp() throws HiveException { try { - if (hashAggregations != null) { + if (hashAggregations != null && !hashAggregations.isEmpty()) { LOG.info("Begin Hash Table flush: size = " + hashAggregations.size()); Iterator iter = hashAggregations.entrySet().iterator(); @@ -1083,17 +1081,11 @@ public void flush() throws HiveException{ iter.remove(); } hashAggregations.clear(); - } else if (aggregations != null) { + } + if (aggregations != null && currentKeys != null) { // sort-based aggregations - if (currentKeys != null) { - forward(currentKeys.getKeyArray(), aggregations); - } + forward(currentKeys.getKeyArray(), aggregations); currentKeys = null; - } else { - // The GroupByOperator is not initialized, which means there is no - // data - // (since we initialize the operators when we see the first record). - // Just do nothing here. } } catch (Exception e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java index 91b2369..a17a854 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableDummyOperator.java @@ -45,6 +45,16 @@ protected void initializeOp(Configuration hconf) throws HiveException { } @Override + protected boolean isGroupStarted() { + return true; + } + + @Override + protected boolean isGroupEnded() { + return true; + } + + @Override public void processOp(Object row, int tag) throws HiveException { throw new HiveException(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java index c747099..470df3f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java @@ -242,7 +242,7 @@ private void mvFileToFinalPath(Path specPath, Configuration hconf, * @throws HiveException */ @Override - public void endGroup() throws HiveException { + public void endGroupOp() throws HiveException { // if this is a skew key, we need to handle it in a separate map reduce job. if (handleSkewJoin && skewJoinKeyContext.currBigKeyTag >= 0) { try { @@ -251,7 +251,6 @@ public void endGroup() throws HiveException { LOG.error(e.getMessage(), e); throw new HiveException(e); } - return; } else { checkAndGenObject(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index e877cd4..80ff2e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -80,13 +80,11 @@ public MapJoinOperator(AbstractMapJoinOperator mjop) { * Note: The mapjoin can be run in the reducer only on Tez. */ @Override - public void endGroup() throws HiveException { - defaultEndGroup(); + public void endGroupOp() throws HiveException { } @Override - public void startGroup() throws HiveException { - defaultStartGroup(); + public void startGroupOp() throws HiveException { } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java index b10a7fa..ae7c00f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MuxOperator.java @@ -160,7 +160,6 @@ public Object process(Object row) throws HiveException { private transient ObjectInspector[] outputObjectInspectors; private transient int numParents; private transient boolean[] forward; - private transient boolean[] processGroupCalled; private Handler[] handlers; // Counters for debugging, we cannot use existing counters (cntr and nextCntr) @@ -177,13 +176,11 @@ protected void initializeOp(Configuration hconf) throws HiveException { } numParents = getNumParent(); forward = new boolean[numParents]; - processGroupCalled = new boolean[numParents]; outputObjectInspectors = new ObjectInspector[numParents]; handlers = new Handler[numParents]; cntrs = new long[numParents]; nextCntrs = new long[numParents]; for (int i = 0; i < numParents; i++) { - processGroupCalled[i] = false; if (conf.getParentToKeyCols().get(i) == null) { // We do not need to evaluate the input row for this parent. // So, we can just forward it to the child of this MuxOperator. @@ -269,38 +266,6 @@ public void forward(Object row, ObjectInspector rowInspector) } @Override - public void startGroup() throws HiveException{ - for (int i = 0; i < numParents; i++) { - processGroupCalled[i] = false; - } - super.startGroup(); - } - - @Override - public void endGroup() throws HiveException { - // do nothing - } - - @Override - public void processGroup(int tag) throws HiveException { - processGroupCalled[tag] = true; - boolean shouldProceed = true; - for (int i = 0; i < numParents; i++) { - if (!processGroupCalled[i]) { - shouldProceed = false; - break; - } - } - if (shouldProceed) { - Operator child = childOperatorsArray[0]; - int childTag = childOperatorsTag[0]; - child.flush(); - child.endGroup(); - child.processGroup(childTag); - } - } - - @Override protected void closeOp(boolean abort) throws HiveException { for (int i = 0; i < numParents; i++) { LOG.info(id + ", tag=" + i + ", forwarded " + cntrs[i] + " rows"); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index db94271..e29dfd5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -68,6 +68,8 @@ protected String operatorId; private transient ExecMapperContext execContext; + private transient boolean groupStarted; + private static AtomicInteger seqId; // It can be optimized later so that an operator operator (init/close) is performed @@ -488,60 +490,101 @@ public ObjectInspector getOutputObjInspector() { */ public abstract void processOp(Object row, int tag) throws HiveException; - protected final void defaultStartGroup() throws HiveException { - LOG.debug("Starting group"); + protected boolean isGroupStarted() { + return groupStarted; + } - if (childOperators == null) { + protected boolean areAllParentsGroupStarted() { + if (parentOperators != null) { + for (Operator parent : parentOperators) { + if (parent != null && !parent.isGroupStarted()) { + return false; + } + } + } + return true; + } + + public final void startGroup() throws HiveException { + if (!areAllParentsGroupStarted()) { return; } + LOG.debug("Starting group"); + groupStarted = true; + + startGroupOp(); LOG.debug("Starting group for children:"); - for (Operator op : childOperators) { - op.setGroupKeyObjectInspector(groupKeyOI); - op.setGroupKeyObject(groupKeyObject); - op.startGroup(); + if (childOperators != null) { + for (Operator op : childOperators) { + op.setGroupKeyObjectInspector(groupKeyOI); + op.setGroupKeyObject(groupKeyObject); + op.startGroup(); + } } - LOG.debug("Start group Done"); } - protected final void defaultEndGroup() throws HiveException { - LOG.debug("Ending group"); + // If a operator wants to do some work at the beginning of a group + protected void startGroupOp() throws HiveException { + } - if (childOperators == null) { + protected boolean isGroupEnded() { + return !groupStarted; + } + + protected boolean areAllParentsGroupEnded() { + if (parentOperators != null) { + for (Operator parent : parentOperators) { + if (parent != null && !parent.isGroupEnded()) { + return false; + } + } + } + return true; + } + + public final void endGroup() throws HiveException { + if (!areAllParentsGroupEnded()) { return; } + LOG.debug("Ending group"); + + groupStarted = false; + endGroupOp(); LOG.debug("Ending group for children:"); - for (Operator op : childOperators) { - op.endGroup(); + if (childOperators != null) { + for (Operator op : childOperators) { + op.endGroup(); + } } LOG.debug("End group Done"); } - // If a operator wants to do some work at the beginning of a group - public void startGroup() throws HiveException { - defaultStartGroup(); - } - // If an operator wants to do some work at the end of a group - public void endGroup() throws HiveException { - defaultEndGroup(); + protected void endGroupOp() throws HiveException { } - // an blocking operator (e.g. GroupByOperator and JoinOperator) can - // override this method to forward its outputs public void flush() throws HiveException { - } - - public void processGroup(int tag) throws HiveException { - if (childOperators == null || childOperators.isEmpty()) { + if (!areAllParentsGroupEnded()) { return; } - for (int i = 0; i < childOperatorsArray.length; i++) { - childOperatorsArray[i].processGroup(childOperatorsTag[i]); + LOG.debug("Flushing group"); + + flushOp(); + LOG.debug("Flushing group for children:"); + if (childOperators != null) { + for (Operator op : childOperators) { + op.flush(); + } } + + LOG.debug("Flushing group Done"); + } + + protected void flushOp() throws HiveException { } protected boolean allInitializedParentsAreClosed() { @@ -604,7 +647,7 @@ public void close(boolean abort) throws HiveException { /** * Operator specific close routine. Operators which inherents this class - * should overwrite this funtion for their specific cleanup routine. + * should overwrite this function for their specific cleanup routine. */ protected void closeOp(boolean abort) throws HiveException { } @@ -758,7 +801,7 @@ public boolean removeChildren(int depth) { } /** - * Replace one parent with another at the same position. Chilren of the new + * Replace one parent with another at the same position. Children of the new * parent are not updated * * @param parent diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java index c52f753..686ba72 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java @@ -459,7 +459,8 @@ private boolean sameOrder(String order1, String order2) { CorrelationUtilities.getSingleChild(rsop, GroupByOperator.class); if (cGBY != null) { if (CorrelationUtilities.hasGroupingSet(rsop) || - cGBY.getConf().isGroupingSetsPresent()) { + cGBY.getConf().isGroupingSetsPresent() || + cGBY.getConf().getGroupKeyNotReductionKey()) { // Do not support grouping set right now isCorrelated = false; } @@ -536,7 +537,8 @@ private boolean sameOrder(String order1, String order2) { CorrelationUtilities.getSingleChild(op, GroupByOperator.class); if (cGBY != null) { if (CorrelationUtilities.hasGroupingSet(op) || - cGBY.getConf().isGroupingSetsPresent()) { + cGBY.getConf().isGroupingSetsPresent() || + cGBY.getConf().getGroupKeyNotReductionKey()) { // Do not support grouping set right now shouldDetect = false; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java index 43458d9..283cdb3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/FakeCaptureOutputOperator.java @@ -20,7 +20,6 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.hadoop.conf.Configuration; diff --git ql/src/test/queries/clientpositive/correlationoptimizer16.q ql/src/test/queries/clientpositive/correlationoptimizer16.q new file mode 100644 index 0000000..970ff8a --- /dev/null +++ ql/src/test/queries/clientpositive/correlationoptimizer16.q @@ -0,0 +1,19 @@ +create table TBL (a string, b string); +insert into table TBL select 'a','a' from src tablesample (1 rows); + +set hive.optimize.correlation=true; + +explain +select b, sum(cc) from ( + select b,count(1) as cc from TBL group by b + union all + select a as b,count(1) as cc from TBL group by a +) z +group by b; + +select b, sum(cc) from ( + select b,count(1) as cc from TBL group by b + union all + select a as b,count(1) as cc from TBL group by a +) z +group by b; diff --git ql/src/test/results/clientpositive/correlationoptimizer15.q.out ql/src/test/results/clientpositive/correlationoptimizer15.q.out index eda3c0c..24724c4 100644 --- ql/src/test/results/clientpositive/correlationoptimizer15.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer15.q.out @@ -364,39 +364,17 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### 128 1 128 -128 1 128 -128 1 128 -146 1 146 146 1 146 150 1 150 213 1 213 -213 1 213 -224 1 224 224 1 224 238 1 238 -238 1 238 255 1 255 -255 1 255 -273 1 273 -273 1 273 273 1 273 278 1 278 -278 1 278 -311 1 311 311 1 311 -311 1 311 -369 1 369 -369 1 369 369 1 369 401 1 401 -401 1 401 -401 1 401 -401 1 401 -401 1 401 -406 1 406 -406 1 406 -406 1 406 406 1 406 66 1 66 98 1 98 -98 1 98 diff --git ql/src/test/results/clientpositive/correlationoptimizer16.q.out ql/src/test/results/clientpositive/correlationoptimizer16.q.out new file mode 100644 index 0000000..fc0e296 --- /dev/null +++ ql/src/test/results/clientpositive/correlationoptimizer16.q.out @@ -0,0 +1,178 @@ +PREHOOK: query: create table TBL (a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table TBL (a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TBL +PREHOOK: query: insert into table TBL select 'a','a' from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl +POSTHOOK: query: insert into table TBL select 'a','a' from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl +POSTHOOK: Lineage: tbl.a SIMPLE [] +POSTHOOK: Lineage: tbl.b SIMPLE [] +PREHOOK: query: explain +select b, sum(cc) from ( + select b,count(1) as cc from TBL group by b + union all + select a as b,count(1) as cc from TBL group by a +) z +group by b +PREHOOK: type: QUERY +POSTHOOK: query: explain +select b, sum(cc) from ( + select b,count(1) as cc from TBL group by b + union all + select a as b,count(1) as cc from TBL group by a +) z +group by b +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: string) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: a (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + alias: tbl + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: b (type: string) + outputColumnNames: b + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Demux Operator + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b, sum(cc) from ( + select b,count(1) as cc from TBL group by b + union all + select a as b,count(1) as cc from TBL group by a +) z +group by b +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl +#### A masked pattern was here #### +POSTHOOK: query: select b, sum(cc) from ( + select b,count(1) as cc from TBL group by b + union all + select a as b,count(1) as cc from TBL group by a +) z +group by b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl +#### A masked pattern was here #### +a 2