diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 01c1d30..8219cae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -580,6 +580,22 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Map colExprMap = gop.getColumnExprMap(); RowSchema rs = gop.getSchema(); Statistics stats = null; + boolean mapSide = false; + int multiplier = mapSideParallelism; + long newNumRows; + long newDataSize; + + // map side + if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) { + + mapSide = true; + + // map-side grouping set present. if grouping set is present then + // multiply the number of rows by number of elements in grouping set + if (gop.getConf().isGroupingSetsPresent()) { + multiplier *= gop.getConf().getListGroupingSets().size(); + } + } try { if (satisfyPrecondition(parentStats)) { @@ -589,7 +605,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs); stats.setColumnStats(colStats); long dvProd = 1; - long newNumRows = 0; // compute product of distinct values of grouping columns for (ColStatistics cs : colStats) { @@ -617,7 +632,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } // map side - if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) { + if (mapSide) { // since we do not know if hash-aggregation will be enabled or disabled // at runtime we will assume that map-side group by does not do any @@ -626,14 +641,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // map-side grouping set present. if grouping set is present then // multiply the number of rows by number of elements in grouping set if (gop.getConf().isGroupingSetsPresent()) { - int multiplier = gop.getConf().getListGroupingSets().size(); - - // take into account the map-side parallelism as well, default is 1 - multiplier *= mapSideParallelism; newNumRows = multiplier * stats.getNumRows(); - long dataSize = multiplier * stats.getDataSize(); + newDataSize = multiplier * stats.getDataSize(); stats.setNumRows(newNumRows); - stats.setDataSize(dataSize); + stats.setDataSize(newDataSize); for (ColStatistics cs : colStats) { if (cs != null) { long oldNumNulls = cs.getNumNulls(); @@ -644,7 +655,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { // map side no grouping set - newNumRows = stats.getNumRows() * mapSideParallelism; + newNumRows = stats.getNumRows() * multiplier; updateStats(stats, newNumRows, true); } } else { @@ -656,16 +667,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } else { if (parentStats != null) { + stats = parentStats.clone(); + // worst case, in the absence of column statistics assume half the rows are emitted - if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) { + if (mapSide) { // map side - stats = parentStats.clone(); + newNumRows = multiplier * stats.getNumRows(); + newDataSize = multiplier * stats.getDataSize(); + stats.setNumRows(newNumRows); + stats.setDataSize(newDataSize); } else { // reduce side - stats = parentStats.clone(); - long newNumRows = parentStats.getNumRows() / 2; + newNumRows = parentStats.getNumRows() / 2; updateStats(stats, newNumRows, false); } } diff --git ql/src/test/queries/clientpositive/annotate_stats_groupby.q ql/src/test/queries/clientpositive/annotate_stats_groupby.q index e8e84c6..1c0829d 100644 --- ql/src/test/queries/clientpositive/annotate_stats_groupby.q +++ ql/src/test/queries/clientpositive/annotate_stats_groupby.q @@ -67,3 +67,33 @@ explain select year from loc_orc group by year; -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) explain select state,locid from loc_orc group by state,locid with cube; +set hive.stats.fetch.column.stats=false; +set hive.stats.map.parallelism=1; + +-- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain select state,locid from loc_orc group by state,locid with cube; + +-- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain select state,locid from loc_orc group by state,locid with rollup; + +-- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain select state,locid from loc_orc group by state,locid grouping sets((state)); + +-- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)); + +-- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()); + +-- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()); + +set hive.stats.map.parallelism=10; + +-- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain select year from loc_orc group by year; + +-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain select state,locid from loc_orc group by state,locid with cube; + diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index 9c37d9b..871c4217 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -756,3 +756,445 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain select state,locid from loc_orc group by state,locid with rollup +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain select state,locid from loc_orc group by state,locid with rollup +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain select state,locid from loc_orc group by state,locid grouping sets((state)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 8 reduce-side GBY numRows: 4 +explain select state,locid from loc_orc group by state,locid grouping sets((state)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 16 reduce-side GBY numRows: 8 +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 24 reduce-side GBY numRows: 12 +explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),()) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 24 Data size: 2388 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1194 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 32 reduce-side GBY numRows: 16 +explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),()) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 32 Data size: 3184 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1592 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain select year from loc_orc group by year +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY: numRows: 80 (map-side will not do any reduction) +-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2) +explain select year from loc_orc group by year +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: year (type: int) + outputColumnNames: year + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: year (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 80 Data size: 7960 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 40 Data size: 3980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain select state,locid from loc_orc group by state,locid with cube +PREHOOK: type: QUERY +POSTHOOK: query: -- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7) +explain select state,locid from loc_orc group by state,locid with cube +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: state (type: string), locid (type: int) + outputColumnNames: state, locid + Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: state (type: string), locid (type: int), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 320 Data size: 31840 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 160 Data size: 15920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 4246744..0f840d6 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -44,12 +44,12 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -128,12 +128,12 @@ STAGE PLANS: keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -200,12 +200,12 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -213,7 +213,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -229,7 +229,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -308,12 +308,12 @@ STAGE PLANS: keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -405,12 +405,12 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Select Operator expressions: key (type: string), val (type: string) @@ -421,7 +421,7 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -434,7 +434,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -450,7 +450,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -493,7 +493,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -501,7 +501,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -517,7 +517,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 120 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out index 1cd65f6..75aaddc 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets2.q.out @@ -56,7 +56,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -72,7 +72,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -162,7 +162,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -178,7 +178,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 144 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: double) Reduce Operator Tree: Group By Operator @@ -290,7 +290,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -306,7 +306,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -314,14 +314,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index f1ecbb4..a1842c1 100644 --- ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -62,12 +62,12 @@ STAGE PLANS: keys: a (type: string), b (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator @@ -162,7 +162,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -178,7 +178,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 288 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index 5db5cd5..827b081 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -44,12 +44,12 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -122,12 +122,12 @@ STAGE PLANS: keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -194,12 +194,12 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -207,7 +207,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -223,7 +223,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -296,12 +296,12 @@ STAGE PLANS: keys: key (type: string), '0' (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 60 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) @@ -393,12 +393,12 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Select Operator expressions: key (type: string), val (type: string) @@ -409,7 +409,7 @@ STAGE PLANS: keys: key (type: string), val (type: string), '0' (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -422,7 +422,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -438,7 +438,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -481,7 +481,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator @@ -489,7 +489,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: @@ -505,7 +505,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 0 Data size: 90 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) Reduce Operator Tree: Group By Operator