diff --git accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index 6621a4e204..f4c9fa4702 100644 --- accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -35,12 +35,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-5 depends on stages: Stage-3, Stage-2, Stage-1 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 Stage-7 Stage-3 depends on stages: Stage-7, Stage-6, Stage-9 - Stage-5 depends on stages: Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 @@ -58,6 +58,14 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +91,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +119,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.src_x2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -118,9 +152,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-5 - Stats-Aggr Operator - Stage: Stage-6 Map Reduce Map Operator Tree: diff --git common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java index 7c27d07024..45f1afec5d 100644 --- common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java +++ common/src/java/org/apache/hadoop/hive/common/StatsSetupConst.java @@ -257,6 +257,14 @@ public static void setColumnStatsState(Map params, List } } + public static boolean canColumnStatsMerge(Map params, String colName) { + if (params == null) { + return false; + } + ColumnStatsAccurate stats = parseStatsAcc(params.get(COLUMN_STATS_ACCURATE)); + return stats.columnStats.containsKey(colName); + } + public static void clearColumnStatsState(Map params) { if (params == null) { return; @@ -294,7 +302,9 @@ public static void setStatsStateForCreateTable(Map params, } } setBasicStatsState(params, setting); - setColumnStatsState(params, cols); + if (TRUE.equals(setting)) { + setColumnStatsState(params, cols); + } } private static ColumnStatsAccurate parseStatsAcc(String statsAcc) { diff --git common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java index b7dc88c939..bcbfe977dd 100644 --- common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java +++ common/src/java/org/apache/hadoop/hive/common/jsonexplain/Vertex.java @@ -249,7 +249,8 @@ public void print(Printer printer, int indentFlag, String type, Vertex callingVe // find the right op Op choose = null; for (Op op : this.outputOps) { - if (op.outputVertexName.equals(callingVertex.name)) { + // op.outputVertexName may be null + if (callingVertex.name.equals(op.outputVertexName)) { choose = op; } } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 24c5db0e47..99984a848b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1681,7 +1681,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "This many percentage of rows will be estimated as number of nulls in absence of statistics."), HIVESTATSAUTOGATHER("hive.stats.autogather", true, "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), - HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, + HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", true, "A flag to gather column statistics automatically."), HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + diff --git contrib/src/test/results/clientpositive/serde_typedbytes.q.out contrib/src/test/results/clientpositive/serde_typedbytes.q.out index 6876ca8775..c9d0fb6b63 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -72,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -93,7 +119,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git contrib/src/test/results/clientpositive/serde_typedbytes2.q.out contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 79cf8fe1e5..715dc95f8b 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -68,6 +68,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -89,7 +115,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: smallint, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git contrib/src/test/results/clientpositive/serde_typedbytes3.q.out contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index fec58ef026..b66d5043b9 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -68,6 +68,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -89,7 +115,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git contrib/src/test/results/clientpositive/serde_typedbytes4.q.out contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index 1131478a7b..45a209223a 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -77,6 +77,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +109,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 PREHOOK: query: FROM ( FROM src diff --git contrib/src/test/results/clientpositive/serde_typedbytes5.q.out contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 8d3b95ece8..4beb35a243 100644 --- contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -72,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -93,7 +119,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git data/conf/hive-site.xml data/conf/hive-site.xml index a205b8c569..05d6e8ac0e 100644 --- data/conf/hive-site.xml +++ data/conf/hive-site.xml @@ -302,12 +302,15 @@ true - hive.llap.io.allocator.direct false + + hive.stats.column.autogather + true + hive.materializedview.rewriting diff --git hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 68a417d0c1..6498960afb 100644 --- hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -35,12 +35,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-5 depends on stages: Stage-3, Stage-2, Stage-1 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 Stage-7 Stage-3 depends on stages: Stage-7, Stage-6, Stage-9 - Stage-5 depends on stages: Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 @@ -58,6 +58,14 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +91,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +119,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.src_x2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -118,9 +152,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 - Stage: Stage-5 - Stats-Aggr Operator - Stage: Stage-6 Map Reduce Map Operator Tree: diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out index e55b1c257e..bd31bd0cab 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out @@ -109,6 +109,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true","tmp_values_col2":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1,tmp_values_col2 @@ -116,14 +117,19 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1, string tmp_values_col2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true","tmp_values_col2":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1,tmp_values_col2 @@ -131,9 +137,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1, string tmp_values_col2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.values__tmp__table__3 name: default.values__tmp__table__3 @@ -208,8 +218,14 @@ STAGE PLANS: name: default.table1 Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index f50f4af817..bed71d3a9f 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -73,7 +73,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -95,6 +95,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -104,6 +120,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1 @@ -111,14 +128,19 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1 @@ -126,14 +148,47 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.values__tmp__table__3 name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -147,7 +202,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -168,8 +223,14 @@ STAGE PLANS: name: default.table1 Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -185,7 +246,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -216,7 +277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -238,7 +299,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -270,7 +331,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -304,7 +365,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -335,7 +396,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -357,7 +418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out index 6e95fd123c..17db9dbd44 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out @@ -107,7 +107,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -129,7 +129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -358,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -380,7 +380,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out index 660cebba5f..bdefb5c2dd 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out @@ -127,6 +127,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true","tmp_values_col2":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1,tmp_values_col2 @@ -134,14 +135,19 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1, string tmp_values_col2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true","tmp_values_col2":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1,tmp_values_col2 @@ -149,9 +155,13 @@ STAGE PLANS: columns.types string:string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1, string tmp_values_col2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.values__tmp__table__3 name: default.values__tmp__table__3 @@ -226,8 +236,14 @@ STAGE PLANS: name: default.table1 Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out index ba0e83d562..f33acd1712 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out @@ -81,7 +81,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -103,6 +103,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -112,6 +128,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1 @@ -119,14 +136,19 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"tmp_values_col1":"true"}} bucket_count -1 column.name.delimiter , columns tmp_values_col1 @@ -134,14 +156,47 @@ STAGE PLANS: columns.types string #### A masked pattern was here #### name default.values__tmp__table__3 + numFiles 0 + numRows 0 + rawDataSize 0 serialization.ddl struct values__tmp__table__3 { string tmp_values_col1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.values__tmp__table__3 name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -155,7 +210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -176,8 +231,14 @@ STAGE PLANS: name: default.table1 Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -193,7 +254,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -224,7 +285,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -246,7 +307,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -278,7 +339,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -312,7 +373,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -343,7 +404,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -365,7 +426,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 2ababb1eec..5cb42b9ff9 100644 --- itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -214,6 +214,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -245,8 +280,14 @@ STAGE PLANS: name: default.blobstore_table Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true PREHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key PREHOOK: type: QUERY @@ -438,6 +479,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -469,8 +545,14 @@ STAGE PLANS: name: default.blobstore_table Stage: Stage-3 - Stats-Aggr Operator - Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true PREHOOK: query: DROP TABLE hdfs_table PREHOOK: type: DROPTABLE diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java index ad2baa2e26..e8ef4b97d6 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java @@ -44,6 +44,7 @@ public void testMTQueries1() throws Exception { util.getConf().setBoolean("hive.exec.submit.local.task.via.child", true); util.getConf().set("hive.stats.dbclass", "fs"); util.getConf().set("hive.mapred.mode", "nonstrict"); + util.getConf().set("hive.stats.column.autogather", "false"); } boolean success = QTestUtil.queryListRunnerMultiThreaded(qfiles, qts); if (!success) { diff --git itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index 4a9af80fdc..96173c014e 100644 --- itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -201,6 +201,7 @@ private static void startMiniHS2(HiveConf conf) throws Exception { private static void startMiniHS2(HiveConf conf, boolean httpMode) throws Exception { conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); conf.setBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED, false); + conf.setBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER, false); MiniHS2.Builder builder = new MiniHS2.Builder().withConf(conf).cleanupLocalDirOnStartup(false); if (httpMode) { builder = builder.withHTTPTransport(); diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index d9e760f4b7..2d20b039f2 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -111,6 +111,9 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ auto_sortmerge_join_7.q,\ auto_sortmerge_join_8.q,\ auto_sortmerge_join_9.q,\ + autoColumnStats_1.q,\ + autoColumnStats_10.q,\ + autoColumnStats_2.q,\ bucket2.q,\ bucket3.q,\ bucket4.q,\ @@ -471,8 +474,6 @@ minillaplocal.query.files=\ auto_sortmerge_join_6.q,\ auto_sortmerge_join_8.q,\ auto_sortmerge_join_9.q,\ - autoColumnStats_1.q,\ - autoColumnStats_2.q,\ bucket4.q,\ bucket_groupby.q,\ bucket_many.q,\ diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java index b279e1d567..3dbe72fef4 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java @@ -248,7 +248,7 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, part.setDbName(newDbName); part.setTableName(newTblName); ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, - part.getValues(), part.getSd().getCols(), oldt, part); + part.getValues(), part.getSd().getCols(), oldt, part, null); if (colStats != null) { columnStatsNeedUpdated.put(part, colStats); } @@ -287,7 +287,7 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, List oldCols = part.getSd().getCols(); part.getSd().setCols(newt.getSd().getCols()); ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, - part.getValues(), oldCols, oldt, part); + part.getValues(), oldCols, oldt, part, null); assert(colStats == null); msdb.alterPartition(dbname, name, part.getValues(), part); } @@ -296,6 +296,17 @@ public void alterTable(RawStore msdb, Warehouse wh, String dbname, LOG.warn("Alter table does not cascade changes to its partitions."); } } else { + if (isPartitionedTable + && !MetaStoreUtils.areSameColumns(oldt.getSd().getCols(), newt.getSd().getCols())) { + parts = msdb.getPartitions(dbname, name, -1); + for (Partition part : parts) { + List oldCols = part.getSd().getCols(); + ColumnStatistics colStats = updateOrGetPartitionColumnStats(msdb, dbname, name, + part.getValues(), oldCols, oldt, part, newt.getSd().getCols()); + assert (colStats == null); + msdb.alterPartition(dbname, name, part.getValues(), part); + } + } alterTableUpdateTableColumnStats(msdb, oldt, newt); } } @@ -412,7 +423,7 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String // PartitionView does not have SD. We do not need update its column stats if (oldPart.getSd() != null) { updateOrGetPartitionColumnStats(msdb, dbname, name, new_part.getValues(), - oldPart.getSd().getCols(), tbl, new_part); + oldPart.getSd().getCols(), tbl, new_part, null); } msdb.alterPartition(dbname, name, new_part.getValues(), new_part); if (transactionalListeners != null && !transactionalListeners.isEmpty()) { @@ -539,7 +550,7 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String String newPartName = Warehouse.makePartName(tbl.getPartitionKeys(), new_part.getValues()); ColumnStatistics cs = updateOrGetPartitionColumnStats(msdb, dbname, name, oldPart.getValues(), - oldPart.getSd().getCols(), tbl, new_part); + oldPart.getSd().getCols(), tbl, new_part, null); msdb.alterPartition(dbname, name, part_vals, new_part); if (cs != null) { cs.getStatsDesc().setPartName(newPartName); @@ -637,7 +648,7 @@ public Partition alterPartition(final RawStore msdb, Warehouse wh, final String // PartitionView does not have SD and we do not need to update its column stats if (oldTmpPart.getSd() != null) { updateOrGetPartitionColumnStats(msdb, dbname, name, oldTmpPart.getValues(), - oldTmpPart.getSd().getCols(), tbl, tmpPart); + oldTmpPart.getSd().getCols(), tbl, tmpPart, null); } } @@ -789,12 +800,14 @@ void alterTableUpdateTableColumnStats(RawStore msdb, Table oldTable, Table newTa private ColumnStatistics updateOrGetPartitionColumnStats( RawStore msdb, String dbname, String tblname, List partVals, - List oldCols, Table table, Partition part) + List oldCols, Table table, Partition part, List newCols) throws MetaException, InvalidObjectException { ColumnStatistics newPartsColStats = null; try { - List newCols = part.getSd() == null ? - new ArrayList() : part.getSd().getCols(); + // if newCols are not specified, use default ones. + if (newCols == null) { + newCols = part.getSd() == null ? new ArrayList() : part.getSd().getCols(); + } String oldPartName = Warehouse.makePartName(table.getPartitionKeys(), partVals); String newPartName = Warehouse.makePartName(table.getPartitionKeys(), part.getValues()); boolean rename = !part.getDbName().equals(dbname) || !part.getTableName().equals(tblname) diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 5812a1bf4f..4b6c47232a 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -35,6 +35,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -63,6 +64,7 @@ import com.google.common.collect.ImmutableListMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Multimaps; + import org.apache.commons.cli.OptionBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -6964,39 +6966,80 @@ public boolean set_aggr_stats_for(SetPartitionsStatsRequest request) if (request.isSetNeedMerge() && request.isNeedMerge()) { // one single call to get all column stats ColumnStatistics csOld = getMS().getTableColumnStatistics(dbName, tableName, colNames); - if (csOld != null && csOld.getStatsObjSize() != 0) { + Table t = getTable(dbName, tableName); + // we first use t.getParameters() to prune the stats + MetaStoreUtils.pruneColumnStats(firstColStats, t.getParameters()); + // we merge those that can be merged + if (csOld != null && csOld.getStatsObjSize() != 0 + && !firstColStats.getStatsObj().isEmpty()) { MetaStoreUtils.mergeColStats(firstColStats, csOld); } + if (!firstColStats.getStatsObj().isEmpty()) { + return update_table_column_statistics(firstColStats); + } else { + LOG.debug("All the column stats are not accurate to merge."); + return true; + } + } else { + // This is the overwrite case, we do not care about the accuracy. + return update_table_column_statistics(firstColStats); } - return update_table_column_statistics(firstColStats); } } else { // partition level column stats merging - List partitionNames = new ArrayList<>(); + List partitions = new ArrayList<>(); + // note that we may have two or more duplicate partition names. + // see autoColumnStats_2.q under TestMiniLlapLocalCliDriver + Map newStatsMap = new HashMap<>(); for (ColumnStatistics csNew : csNews) { - partitionNames.add(csNew.getStatsDesc().getPartName()); + String partName = csNew.getStatsDesc().getPartName(); + if (newStatsMap.containsKey(partName)) { + MetaStoreUtils.mergeColStats(csNew, newStatsMap.get(partName)); + } + newStatsMap.put(partName, csNew); } - Map map = new HashMap<>(); + + Map oldStatsMap = new HashMap<>(); + Map mapToPart = new HashMap<>(); if (request.isSetNeedMerge() && request.isNeedMerge()) { // a single call to get all column stats for all partitions + List partitionNames = new ArrayList<>(); + partitionNames.addAll(newStatsMap.keySet()); List csOlds = getMS().getPartitionColumnStatistics(dbName, tableName, partitionNames, colNames); - if (csNews.size() != csOlds.size()) { + if (newStatsMap.values().size() != csOlds.size()) { // some of the partitions miss stats. LOG.debug("Some of the partitions miss stats."); } for (ColumnStatistics csOld : csOlds) { - map.put(csOld.getStatsDesc().getPartName(), csOld); + oldStatsMap.put(csOld.getStatsDesc().getPartName(), csOld); + } + // another single call to get all the partition objects + partitions = getMS().getPartitionsByNames(dbName, tableName, partitionNames); + for (int index = 0; index < partitionNames.size(); index++) { + mapToPart.put(partitionNames.get(index), partitions.get(index)); } } Table t = getTable(dbName, tableName); - for (int index = 0; index < csNews.size(); index++) { - ColumnStatistics csNew = csNews.get(index); - ColumnStatistics csOld = map.get(csNew.getStatsDesc().getPartName()); - if (csOld != null && csOld.getStatsObjSize() != 0) { - MetaStoreUtils.mergeColStats(csNew, csOld); + for (Entry entry : newStatsMap.entrySet()) { + ColumnStatistics csNew = entry.getValue(); + ColumnStatistics csOld = oldStatsMap.get(entry.getKey()); + if (request.isSetNeedMerge() && request.isNeedMerge()) { + // we first use getParameters() to prune the stats + MetaStoreUtils.pruneColumnStats(csNew, mapToPart.get(entry.getKey()).getParameters()); + // we merge those that can be merged + if (csOld != null && csOld.getStatsObjSize() != 0 && !csNew.getStatsObj().isEmpty()) { + MetaStoreUtils.mergeColStats(csNew, csOld); + } + if (!csNew.getStatsObj().isEmpty()) { + ret = ret && updatePartitonColStats(t, csNew); + } else { + LOG.debug("All the column stats " + csNew.getStatsDesc().getPartName() + + " are not accurate to merge."); + } + } else { + ret = ret && updatePartitonColStats(t, csNew); } - ret = ret && updatePartitonColStats(t, csNew); } } return ret; diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index bbe13fd77b..f2cd1d7895 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -1903,6 +1903,19 @@ public static String encodeTableName(String name) { return sb.toString(); } + public static void pruneColumnStats(ColumnStatistics csNew, Map parameters) { + List list = new ArrayList<>(); + for (int index = 0; index < csNew.getStatsObj().size(); index++) { + ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); + // canColumnStatsMerge guarantees that it is accurate before we do merge + if (StatsSetupConst.canColumnStatsMerge(parameters, statsObjNew.getColName())) { + list.add(statsObjNew); + } + // in all the other cases, we can not merge + } + csNew.setStatsObj(list); + } + // this function will merge csOld into csNew. public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) throws InvalidObjectException { @@ -1926,13 +1939,20 @@ public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); if (statsObjOld != null) { + // because we already confirm that the stats is accurate + // it is impossible that the column types have been changed while the + // column stats is still accurate. + assert (statsObjNew.getStatsData().getSetField() == statsObjOld.getStatsData() + .getSetField()); // If statsObjOld is found, we can merge. ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, statsObjOld); merger.merge(statsObjNew, statsObjOld); } + // If statsObjOld is not found, we just use statsObjNew as it is accurate. list.add(statsObjNew); } + // in all the other cases, we can not merge csNew.setStatsObj(list); } diff --git metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 3053dcb50b..baeee8be71 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -7081,7 +7081,7 @@ public boolean updateTableColumnStatistics(ColumnStatistics colStats) MTableColumnStatistics mStatsObj = StatObjectConverter.convertToMTableColumnStatistics( ensureGetMTable(statsDesc.getDbName(), statsDesc.getTableName()), statsDesc, statsObj); writeMTableColumnStatistics(table, mStatsObj, oldStats.get(statsObj.getColName())); - colNames.add(statsObj.getColName()); + // There is no need to add colname again, otherwise we will get duplicate colNames. } // Set the table properties diff --git metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index 66be524139..0dc2aa2fee 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -70,7 +70,7 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb break; } default: - throw new IllegalArgumentException("Unknown stats type " + typeNew.toString()); + throw new IllegalArgumentException("Unknown stats type " + statsObjNew.getStatsData().getSetField()); } return agg; } diff --git ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java index f43992c85d..eb94b939f1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java @@ -18,11 +18,12 @@ package org.apache.hadoop.hive.ql; +import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.NodeUtils; import org.apache.hadoop.hive.ql.exec.NodeUtils.Function; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskRunner; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; @@ -64,7 +65,7 @@ private Context ctx; private boolean shutdown; - final Map statsTasks = new HashMap(1); + final Map statsTasks = new HashMap<>(1); public DriverContext() { } @@ -191,7 +192,9 @@ public void prepare(QueryPlan plan) { NodeUtils.iterateTask(rootTasks, StatsTask.class, new Function() { @Override public void apply(StatsTask statsTask) { - statsTasks.put(statsTask.getWork().getAggKey(), statsTask); + if(statsTask.getWork().getBasicStatsWork()!=null) { + statsTasks.put(statsTask.getWork().getBasicStatsWork().getAggKey(), statsTask); + } } }); } @@ -221,7 +224,7 @@ public void apply(FileSinkOperator fsOp) { } }); for (String statKey : statKeys) { - statsTasks.get(statKey).getWork().setSourceTask(mapredTask); + statsTasks.get(statKey).getWork().getBasicStatsWork().setSourceTask(mapredTask); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsNoJobTask.java similarity index 98% rename from ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java rename to ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsNoJobTask.java index 9c3a664b9a..beb68f2703 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsNoJobTask.java @@ -48,7 +48,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; @@ -71,16 +71,16 @@ * rows. This task can be used for computing basic stats like numFiles, numRows, fileSize, * rawDataSize from ORC footer. **/ -public class StatsNoJobTask extends Task implements Serializable { +public class BasicStatsNoJobTask extends Task implements Serializable { private static final long serialVersionUID = 1L; - private static transient final Logger LOG = LoggerFactory.getLogger(StatsNoJobTask.class); + private static transient final Logger LOG = LoggerFactory.getLogger(BasicStatsNoJobTask.class); private ConcurrentMap partUpdates; private Table table; private String tableFullName; private JobConf jc = null; - public StatsNoJobTask() { + public BasicStatsNoJobTask() { super(); } @@ -141,7 +141,6 @@ public void run() { // get the list of partitions org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); Map parameters = tPart.getParameters(); - try { Path dir = new Path(tPart.getSd().getLocation()); long numRows = 0; @@ -174,6 +173,7 @@ public void run() { } if (statsAvailable) { + parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows)); parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize)); parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize)); @@ -280,7 +280,6 @@ private int aggregateStats(ExecutorService threadPool, Hive db) { parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles)); EnvironmentContext environmentContext = new EnvironmentContext(); environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, StatsSetupConst.TASK); - db.alterTable(tableFullName, new Table(tTable), environmentContext); String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']'; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsTask.java new file mode 100644 index 0000000000..d3119040bd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/BasicStatsTask.java @@ -0,0 +1,519 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.stats.StatsAggregator; +import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; +import org.apache.hadoop.hive.ql.stats.StatsFactory; +import org.apache.hadoop.hive.ql.stats.StatsPublisher; +import org.apache.hadoop.util.StringUtils; + +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ThreadFactoryBuilder; + +/** + * StatsTask implementation. StatsTask mainly deals with "collectable" stats. These are + * stats that require data scanning and are collected during query execution (unless the user + * explicitly requests data scanning just for the purpose of stats computation using the "ANALYZE" + * command. All other stats are computed directly by the MetaStore. The rationale being that the + * MetaStore layer covers all Thrift calls and provides better guarantees about the accuracy of + * those stats. + **/ +public class BasicStatsTask extends Task implements Serializable { + + private static final long serialVersionUID = 1L; + private static transient final Logger LOG = LoggerFactory.getLogger(BasicStatsTask.class); + + private Table table; + private Collection dpPartSpecs; + + public BasicStatsTask() { + super(); + dpPartSpecs = null; + } + + @Override + public int execute(DriverContext driverContext) { + if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { + return 0; + } + LOG.info("Executing stats task"); + // Make sure that it is either an ANALYZE, INSERT OVERWRITE (maybe load) or CTAS command + short workComponentsPresent = 0; + if (work.getLoadTableDesc() != null) { + workComponentsPresent++; + } + if (work.getTableSpecs() != null) { + workComponentsPresent++; + } + if (work.getLoadFileDesc() != null) { + workComponentsPresent++; + } + + assert (workComponentsPresent == 1); + + String tableName = ""; + Hive hive = getHive(); + try { + if (work.getLoadTableDesc() != null) { + tableName = work.getLoadTableDesc().getTable().getTableName(); + } else if (work.getTableSpecs() != null){ + tableName = work.getTableSpecs().tableName; + } else { + tableName = work.getLoadFileDesc().getDestinationCreateTable(); + } + + table = hive.getTable(tableName); + + } catch (HiveException e) { + LOG.error("Cannot get table " + tableName, e); + console.printError("Cannot get table " + tableName, e.toString()); + } + + return aggregateStats(hive); + + } + + @Override + public StageType getType() { + return StageType.STATS; + } + + @Override + public String getName() { + return "STATS"; + } + + private int aggregateStats(Hive db) { + + StatsAggregator statsAggregator = null; + int ret = 0; + StatsCollectionContext scc = null; + EnvironmentContext environmentContext = null; + try { + // Stats setup: + final Warehouse wh = new Warehouse(conf); + if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) { + try { + scc = getContext(); + statsAggregator = createStatsAggregator(scc, conf); + } catch (HiveException e) { + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { + throw e; + } + console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString())); + } + } + + List partitions = getPartitionsList(db); + boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC); + + String tableFullName = table.getDbName() + "." + table.getTableName(); + + if (partitions == null) { + org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable(); + Map parameters = tTable.getParameters(); + // In the following scenarios, we need to reset the stats to true. + // work.getTableSpecs() != null means analyze command + // work.getLoadTableDesc().getReplace() is true means insert overwrite command + // work.getLoadFileDesc().getDestinationCreateTable().isEmpty() means CTAS etc. + // acidTable will not have accurate stats unless it is set through analyze command. + if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) { + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); + } else if (work.getTableSpecs() != null + || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) + || (work.getLoadFileDesc() != null && !work.getLoadFileDesc() + .getDestinationCreateTable().isEmpty())) { + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); + } + // work.getTableSpecs() == null means it is not analyze command + // and then if it is not followed by column stats, we should clean + // column stats + if (work.getTableSpecs() == null && !work.isFollowedByColStats()) { + StatsSetupConst.clearColumnStatsState(parameters); + } + // non-partitioned tables: + if (!existStats(parameters) && atomic) { + return 0; + } + + // The collectable stats for the aggregator needs to be cleared. + // For eg. if a file is being loaded, the old number of rows are not valid + if (work.isClearAggregatorStats()) { + // we choose to keep the invalid stats and only change the setting. + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); + } + + updateQuickStats(wh, parameters, tTable.getSd()); + if (StatsSetupConst.areBasicStatsUptoDate(parameters)) { + if (statsAggregator != null) { + String prefix = getAggregationPrefix(table, null); + updateStats(statsAggregator, parameters, prefix, atomic); + } + // write table stats to metastore + if (!getWork().getNoStatsAggregator()) { + environmentContext = new EnvironmentContext(); + environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, + StatsSetupConst.TASK); + } + } + + getHive().alterTable(tableFullName, new Table(tTable), environmentContext); + if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { + console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); + } + LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); + } else { + // Partitioned table: + // Need to get the old stats of the partition + // and update the table stats based on the old and new stats. + List updates = new ArrayList(); + + //Get the file status up-front for all partitions. Beneficial in cases of blob storage systems + final Map fileStatusMap = new ConcurrentHashMap(); + int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1); + // In case thread count is set to 0, use single thread. + poolSize = Math.max(poolSize, 1); + final ExecutorService pool = Executors.newFixedThreadPool(poolSize, + new ThreadFactoryBuilder().setDaemon(true) + .setNameFormat("stats-updater-thread-%d") + .build()); + final List> futures = Lists.newLinkedList(); + LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize); + try { + for(final Partition partn : partitions) { + final String partitionName = partn.getName(); + final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); + Map parameters = tPart.getParameters(); + + if (!existStats(parameters) && atomic) { + continue; + } + futures.add(pool.submit(new Callable() { + @Override + public Void call() throws Exception { + FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd()); + fileStatusMap.put(partitionName, partfileStatus); + return null; + } + })); + } + pool.shutdown(); + for(Future future : futures) { + future.get(); + } + } catch (InterruptedException e) { + LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks"); + //cancel other futures + for (Future future : futures) { + future.cancel(true); + } + // Fail the query if the stats are supposed to be reliable + if (work.isStatsReliable()) { + ret = 1; + } + } finally { + if (pool != null) { + pool.shutdownNow(); + } + LOG.debug("Finished getting file stats of all partitions"); + } + + for (Partition partn : partitions) { + // + // get the old partition stats + // + org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); + Map parameters = tPart.getParameters(); + if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) { + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); + } else if (work.getTableSpecs() != null + || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) + || (work.getLoadFileDesc() != null && !work.getLoadFileDesc() + .getDestinationCreateTable().isEmpty())) { + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); + } + // work.getTableSpecs() == null means it is not analyze command + // and then if it is not followed by column stats, we should clean + // column stats + if (work.getTableSpecs() == null && !work.isFollowedByColStats()) { + StatsSetupConst.clearColumnStatsState(parameters); + } + //only when the stats exist, it is added to fileStatusMap + if (!fileStatusMap.containsKey(partn.getName())) { + continue; + } + + // The collectable stats for the aggregator needs to be cleared. + // For eg. if a file is being loaded, the old number of rows are not valid + if (work.isClearAggregatorStats()) { + // we choose to keep the invalid stats and only change the setting. + StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); + } + + updateQuickStats(parameters, fileStatusMap.get(partn.getName())); + if (StatsSetupConst.areBasicStatsUptoDate(parameters)) { + if (statsAggregator != null) { + String prefix = getAggregationPrefix(table, partn); + updateStats(statsAggregator, parameters, prefix, atomic); + } + if (!getWork().getNoStatsAggregator()) { + environmentContext = new EnvironmentContext(); + environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, + StatsSetupConst.TASK); + } + } + updates.add(new Partition(table, tPart)); + + if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { + console.printInfo("Partition " + tableFullName + partn.getSpec() + + " stats: [" + toString(parameters) + ']'); + } + LOG.info("Partition " + tableFullName + partn.getSpec() + + " stats: [" + toString(parameters) + ']'); + } + if (!updates.isEmpty()) { + db.alterPartitions(tableFullName, updates, environmentContext); + } + } + + } catch (Exception e) { + console.printInfo("[Warning] could not update stats.", + "Failed with exception " + e.getMessage() + "\n" + + StringUtils.stringifyException(e)); + + // Fail the query if the stats are supposed to be reliable + if (work.isStatsReliable()) { + ret = 1; + } + } finally { + if (statsAggregator != null) { + statsAggregator.closeConnection(scc); + } + } + // The return value of 0 indicates success, + // anything else indicates failure + return ret; + } + + private String getAggregationPrefix(Table table, Partition partition) + throws MetaException { + + // prefix is of the form dbName.tblName + String prefix = table.getDbName() + "." + MetaStoreUtils.encodeTableName(table.getTableName()); + if (partition != null) { + return Utilities.join(prefix, Warehouse.makePartPath(partition.getSpec())); + } + return prefix; + } + + private StatsAggregator createStatsAggregator(StatsCollectionContext scc, HiveConf conf) throws HiveException { + String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS); + StatsFactory factory = StatsFactory.newFactory(statsImpl, conf); + if (factory == null) { + throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg()); + } + // initialize stats publishing table for noscan which has only stats task + // the rest of MR task following stats task initializes it in ExecDriver.java + StatsPublisher statsPublisher = factory.getStatsPublisher(); + if (!statsPublisher.init(scc)) { // creating stats table if not exists + throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); + } + + // manufacture a StatsAggregator + StatsAggregator statsAggregator = factory.getStatsAggregator(); + if (!statsAggregator.connect(scc)) { + throw new HiveException(ErrorMsg.STATSAGGREGATOR_CONNECTION_ERROR.getErrorCodedMsg(statsImpl)); + } + return statsAggregator; + } + + private StatsCollectionContext getContext() throws HiveException { + + StatsCollectionContext scc = new StatsCollectionContext(conf); + Task sourceTask = getWork().getSourceTask(); + if (sourceTask == null) { + throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg()); + } + scc.setTask(sourceTask); + scc.setStatsTmpDir(this.getWork().getStatsTmpDir()); + return scc; + } + + private boolean existStats(Map parameters) { + return parameters.containsKey(StatsSetupConst.ROW_COUNT) + || parameters.containsKey(StatsSetupConst.NUM_FILES) + || parameters.containsKey(StatsSetupConst.TOTAL_SIZE) + || parameters.containsKey(StatsSetupConst.RAW_DATA_SIZE) + || parameters.containsKey(StatsSetupConst.NUM_PARTITIONS); + } + + private void updateStats(StatsAggregator statsAggregator, + Map parameters, String prefix, boolean atomic) + throws HiveException { + + String aggKey = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR; + + for (String statType : StatsSetupConst.statsRequireCompute) { + String value = statsAggregator.aggregateStats(aggKey, statType); + if (value != null && !value.isEmpty()) { + long longValue = Long.parseLong(value); + + if (work.getLoadTableDesc() != null && + !work.getLoadTableDesc().getReplace()) { + String originalValue = parameters.get(statType); + if (originalValue != null) { + longValue += Long.parseLong(originalValue); // todo: invalid + valid = invalid + } + } + parameters.put(statType, String.valueOf(longValue)); + } else { + if (atomic) { + throw new HiveException(ErrorMsg.STATSAGGREGATOR_MISSED_SOMESTATS, statType); + } + } + } + } + + private void updateQuickStats(Warehouse wh, Map parameters, + StorageDescriptor desc) throws MetaException { + /** + * calculate fast statistics + */ + FileStatus[] partfileStatus = wh.getFileStatusesForSD(desc); + updateQuickStats(parameters, partfileStatus); + } + + private void updateQuickStats(Map parameters, + FileStatus[] partfileStatus) throws MetaException { + MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + } + + private String toString(Map parameters) { + StringBuilder builder = new StringBuilder(); + for (String statType : StatsSetupConst.supportedStats) { + String value = parameters.get(statType); + if (value != null) { + if (builder.length() > 0) { + builder.append(", "); + } + builder.append(statType).append('=').append(value); + } + } + return builder.toString(); + } + + /** + * Get the list of partitions that need to update statistics. + * TODO: we should reuse the Partitions generated at compile time + * since getting the list of partitions is quite expensive. + * + * @return a list of partitions that need to update statistics. + * @throws HiveException + */ + private List getPartitionsList(Hive db) throws HiveException { + if (work.getLoadFileDesc() != null) { + return null; //we are in CTAS, so we know there are no partitions + } + + List list = new ArrayList(); + + if (work.getTableSpecs() != null) { + + // ANALYZE command + TableSpec tblSpec = work.getTableSpecs(); + table = tblSpec.tableHandle; + if (!table.isPartitioned()) { + return null; + } + // get all partitions that matches with the partition spec + List partitions = tblSpec.partitions; + if (partitions != null) { + for (Partition partn : partitions) { + list.add(partn); + } + } + } else if (work.getLoadTableDesc() != null) { + + // INSERT OVERWRITE command + LoadTableDesc tbd = work.getLoadTableDesc(); + table = db.getTable(tbd.getTable().getTableName()); + if (!table.isPartitioned()) { + return null; + } + DynamicPartitionCtx dpCtx = tbd.getDPCtx(); + if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions + // If no dynamic partitions are generated, dpPartSpecs may not be initialized + if (dpPartSpecs != null) { + // load the list of DP partitions and return the list of partition specs + list.addAll(dpPartSpecs); + } + } else { // static partition + Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); + list.add(partn); + } + } + return list; + } + + public Collection getDpPartSpecs() { + return dpPartSpecs; + } + + public void setDpPartSpecs(Collection dpPartSpecs) { + this.dpPartSpecs = dpPartSpecs; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java deleted file mode 100644 index 2b2c004fea..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ /dev/null @@ -1,452 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec; - -import java.io.IOException; -import java.io.Serializable; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; -import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatistics; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.Date; -import org.apache.hadoop.hive.metastore.api.Decimal; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; -import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; -import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; -import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.QueryPlan; -import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; -import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; -import org.apache.hadoop.hive.ql.plan.api.StageType; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.util.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * ColumnStatsTask implementation. - **/ - -public class ColumnStatsTask extends Task implements Serializable { - private static final long serialVersionUID = 1L; - private FetchOperator ftOp; - private static transient final Logger LOG = LoggerFactory.getLogger(ColumnStatsTask.class); - - public ColumnStatsTask() { - super(); - } - - @Override - public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext ctx, - CompilationOpContext opContext) { - super.initialize(queryState, queryPlan, ctx, opContext); - work.initializeForFetch(opContext); - try { - JobConf job = new JobConf(conf); - ftOp = new FetchOperator(work.getfWork(), job); - } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); - throw new RuntimeException(e); - } - } - - private void unpackBooleanStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) { - long v = ((LongObjectInspector) oi).get(o); - if (fName.equals("counttrues")) { - statsObj.getStatsData().getBooleanStats().setNumTrues(v); - } else if (fName.equals("countfalses")) { - statsObj.getStatsData().getBooleanStats().setNumFalses(v); - } else if (fName.equals("countnulls")) { - statsObj.getStatsData().getBooleanStats().setNumNulls(v); - } - } - - @SuppressWarnings("serial") - class UnsupportedDoubleException extends Exception { - } - - private void unpackDoubleStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDoubleStats().setNumDVs(v); - } else if (fName.equals("max")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { - throw new UnsupportedDoubleException(); - } - statsObj.getStatsData().getDoubleStats().setHighValue(d); - } else if (fName.equals("min")) { - double d = ((DoubleObjectInspector) oi).get(o); - if (Double.isInfinite(d) || Double.isNaN(d)) { - throw new UnsupportedDoubleException(); - } - statsObj.getStatsData().getDoubleStats().setLowValue(d); - } else if (fName.equals("ndvbitvector")) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDoubleStats().setBitVectors(buf); - } - } - - private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDecimalStats().setNumDVs(v); - } else if (fName.equals("max")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d)); - } else if (fName.equals("min")) { - HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); - } else if (fName.equals("ndvbitvector")) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDecimalStats().setBitVectors(buf); - } - } - - private Decimal convertToThriftDecimal(HiveDecimal d) { - return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short)d.scale()); - } - - private void unpackLongStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setNumDVs(v); - } else if (fName.equals("max")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setHighValue(v); - } else if (fName.equals("min")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getLongStats().setLowValue(v); - } else if (fName.equals("ndvbitvector")) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getLongStats().setBitVectors(buf); - } - } - - private void unpackStringStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setNumDVs(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getStringStats().setMaxColLen(v); - } else if (fName.equals("ndvbitvector")) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getStringStats().setBitVectors(buf); - } - } - - private void unpackBinaryStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setNumNulls(v); - } else if (fName.equals("avglength")) { - double d = ((DoubleObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setAvgColLen(d); - } else if (fName.equals("maxlength")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getBinaryStats().setMaxColLen(v); - } - } - - private void unpackDateStats(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj statsObj) { - if (fName.equals("countnulls")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumNulls(v); - } else if (fName.equals("numdistinctvalues")) { - long v = ((LongObjectInspector) oi).get(o); - statsObj.getStatsData().getDateStats().setNumDVs(v); - } else if (fName.equals("max")) { - DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); - } else if (fName.equals("min")) { - DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); - statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); - } else if (fName.equals("ndvbitvector")) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); - statsObj.getStatsData().getDateStats().setBitVectors(buf); - } - } - - private void unpackPrimitiveObject (ObjectInspector oi, Object o, String fieldName, - ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { - if (o == null) { - return; - } - // First infer the type of object - if (fieldName.equals("columntype")) { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); - ColumnStatisticsData statsData = new ColumnStatisticsData(); - - if (s.equalsIgnoreCase("long")) { - LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); - statsData.setLongStats(longStats); - statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("double")) { - DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); - statsData.setDoubleStats(doubleStats); - statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("string")) { - StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); - statsData.setStringStats(stringStats); - statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("boolean")) { - BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); - statsData.setBooleanStats(booleanStats); - statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("binary")) { - BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); - statsData.setBinaryStats(binaryStats); - statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("decimal")) { - DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); - statsData.setDecimalStats(decimalStats); - statsObj.setStatsData(statsData); - } else if (s.equalsIgnoreCase("date")) { - DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); - statsData.setDateStats(dateStats); - statsObj.setStatsData(statsData); - } - } else { - // invoke the right unpack method depending on data type of the column - if (statsObj.getStatsData().isSetBooleanStats()) { - unpackBooleanStats(oi, o, fieldName, statsObj); - } else if (statsObj.getStatsData().isSetLongStats()) { - unpackLongStats(oi, o, fieldName, statsObj); - } else if (statsObj.getStatsData().isSetDoubleStats()) { - unpackDoubleStats(oi,o,fieldName, statsObj); - } else if (statsObj.getStatsData().isSetStringStats()) { - unpackStringStats(oi, o, fieldName, statsObj); - } else if (statsObj.getStatsData().isSetBinaryStats()) { - unpackBinaryStats(oi, o, fieldName, statsObj); - } else if (statsObj.getStatsData().isSetDecimalStats()) { - unpackDecimalStats(oi, o, fieldName, statsObj); - } else if (statsObj.getStatsData().isSetDateStats()) { - unpackDateStats(oi, o, fieldName, statsObj); - } - } - } - - private void unpackStructObject(ObjectInspector oi, Object o, String fName, - ColumnStatisticsObj cStatsObj) throws UnsupportedDoubleException { - if (oi.getCategory() != ObjectInspector.Category.STRUCT) { - throw new RuntimeException("Invalid object datatype : " + oi.getCategory().toString()); - } - - StructObjectInspector soi = (StructObjectInspector) oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(o); - - for (int i = 0; i < fields.size(); i++) { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = (list == null ? null : list.get(i)); - String fieldName = fields.get(i).getFieldName(); - - if (foi.getCategory() == ObjectInspector.Category.PRIMITIVE) { - unpackPrimitiveObject(foi, f, fieldName, cStatsObj); - } else { - unpackStructObject(foi, f, fieldName, cStatsObj); - } - } - } - - private List constructColumnStatsFromPackedRows( - Hive db) throws HiveException, MetaException, IOException { - - String currentDb = SessionState.get().getCurrentDatabase(); - String tableName = work.getColStats().getTableName(); - String partName = null; - List colName = work.getColStats().getColName(); - List colType = work.getColStats().getColType(); - boolean isTblLevel = work.getColStats().isTblLevel(); - - List stats = new ArrayList(); - InspectableObject packedRow; - Table tbl = db.getTable(currentDb, tableName); - while ((packedRow = ftOp.getNextRow()) != null) { - if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) { - throw new HiveException("Unexpected object type encountered while unpacking row"); - } - - List statsObjs = new ArrayList(); - StructObjectInspector soi = (StructObjectInspector) packedRow.oi; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(packedRow.o); - - List partColSchema = tbl.getPartCols(); - // Partition columns are appended at end, we only care about stats column - int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); - for (int i = 0; i < numOfStatCols; i++) { - // Get the field objectInspector, fieldName and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = (list == null ? null : list.get(i)); - String fieldName = fields.get(i).getFieldName(); - ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); - statsObj.setColName(colName.get(i)); - statsObj.setColType(colType.get(i)); - try { - unpackStructObject(foi, f, fieldName, statsObj); - statsObjs.add(statsObj); - } catch (UnsupportedDoubleException e) { - // due to infinity or nan. - LOG.info("Because " + colName.get(i) + " is infinite or NaN, we skip stats."); - } - } - - if (!isTblLevel) { - List partVals = new ArrayList(); - // Iterate over partition columns to figure out partition name - for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { - Object partVal = ((PrimitiveObjectInspector)fields.get(i).getFieldObjectInspector()). - getPrimitiveJavaObject(list.get(i)); - partVals.add(partVal == null ? // could be null for default partition - this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) : partVal.toString()); - } - partName = Warehouse.makePartName(partColSchema, partVals); - } - String [] names = Utilities.getDbTableName(currentDb, tableName); - ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel); - ColumnStatistics colStats = new ColumnStatistics(); - colStats.setStatsDesc(statsDesc); - colStats.setStatsObj(statsObjs); - if (!statsObjs.isEmpty()) { - stats.add(colStats); - } - } - ftOp.clearFetchContext(); - return stats; - } - - private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, - String partName, boolean isTblLevel) - { - ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); - statsDesc.setDbName(dbName); - statsDesc.setTableName(tableName); - statsDesc.setIsTblLevel(isTblLevel); - - if (!isTblLevel) { - statsDesc.setPartName(partName); - } else { - statsDesc.setPartName(null); - } - return statsDesc; - } - - private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException { - // Construct a column statistics object from the result - List colStats = constructColumnStatsFromPackedRows(db); - // Persist the column statistics object to the metastore - // Note, this function is shared for both table and partition column stats. - if (colStats.isEmpty()) { - return 0; - } - SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); - if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) { - request.setNeedMerge(true); - } - db.setPartitionColumnStatistics(request); - return 0; - } - - @Override - public int execute(DriverContext driverContext) { - if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { - return 0; - } - try { - Hive db = getHive(); - return persistColumnStats(db); - } catch (Exception e) { - LOG.error("Failed to run column stats task", e); - } - return 1; - } - - @Override - public StageType getType() { - return StageType.COLUMNSTATS; - } - - @Override - public String getName() { - return "COLUMNSTATS TASK"; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java index c22d69bb19..c0b796579e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/StatsTask.java @@ -16,493 +16,484 @@ * limitations under the License. */ - package org.apache.hadoop.hive.ql.exec; +import java.io.IOException; import java.io.Serializable; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.StatsSetupConst; -import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; +import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.DriverContext; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState; -import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; -import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.api.StageType; -import org.apache.hadoop.hive.ql.stats.StatsAggregator; -import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; -import org.apache.hadoop.hive.ql.stats.StatsFactory; -import org.apache.hadoop.hive.ql.stats.StatsPublisher; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; - -import com.google.common.collect.Lists; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * StatsTask implementation. StatsTask mainly deals with "collectable" stats. These are - * stats that require data scanning and are collected during query execution (unless the user - * explicitly requests data scanning just for the purpose of stats computation using the "ANALYZE" - * command. All other stats are computed directly by the MetaStore. The rationale being that the - * MetaStore layer covers all Thrift calls and provides better guarantees about the accuracy of - * those stats. + * StatsTask implementation. **/ -public class StatsTask extends Task implements Serializable { +public class StatsTask extends Task implements Serializable { private static final long serialVersionUID = 1L; + private FetchOperator ftOp; private static transient final Logger LOG = LoggerFactory.getLogger(StatsTask.class); - private Table table; - private Collection dpPartSpecs; - public StatsTask() { super(); - dpPartSpecs = null; } @Override - protected void receiveFeed(FeedType feedType, Object feedValue) { - // this method should be called by MoveTask when there are dynamic partitions generated - if (feedType == FeedType.DYNAMIC_PARTITIONS) { - dpPartSpecs = (Collection) feedValue; + public void initialize(QueryState queryState, QueryPlan queryPlan, DriverContext ctx, + CompilationOpContext opContext) { + super.initialize(queryState, queryPlan, ctx, opContext); + if (work.getfWork() != null) { + work.initializeForFetch(opContext); + try { + JobConf job = new JobConf(conf); + ftOp = new FetchOperator(work.getfWork(), job); + } catch (Exception e) { + LOG.error(StringUtils.stringifyException(e)); + throw new RuntimeException(e); + } } } - @Override - public int execute(DriverContext driverContext) { - if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { - return 0; - } - LOG.info("Executing stats task"); - // Make sure that it is either an ANALYZE, INSERT OVERWRITE (maybe load) or CTAS command - short workComponentsPresent = 0; - if (work.getLoadTableDesc() != null) { - workComponentsPresent++; - } - if (work.getTableSpecs() != null) { - workComponentsPresent++; - } - if (work.getLoadFileDesc() != null) { - workComponentsPresent++; + private void unpackBooleanStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + long v = ((LongObjectInspector) oi).get(o); + if (fName.equals("counttrues")) { + statsObj.getStatsData().getBooleanStats().setNumTrues(v); + } else if (fName.equals("countfalses")) { + statsObj.getStatsData().getBooleanStats().setNumFalses(v); + } else if (fName.equals("countnulls")) { + statsObj.getStatsData().getBooleanStats().setNumNulls(v); } + } - assert (workComponentsPresent == 1); + @SuppressWarnings("serial") + class UnsupportedDoubleException extends Exception { + } - String tableName = ""; - Hive hive = getHive(); - try { - if (work.getLoadTableDesc() != null) { - tableName = work.getLoadTableDesc().getTable().getTableName(); - } else if (work.getTableSpecs() != null){ - tableName = work.getTableSpecs().tableName; - } else { - tableName = work.getLoadFileDesc().getDestinationCreateTable(); + private void unpackDoubleStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDoubleStats().setNumDVs(v); + } else if (fName.equals("max")) { + double d = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(d) || Double.isNaN(d)) { + throw new UnsupportedDoubleException(); } + statsObj.getStatsData().getDoubleStats().setHighValue(d); + } else if (fName.equals("min")) { + double d = ((DoubleObjectInspector) oi).get(o); + if (Double.isInfinite(d) || Double.isNaN(d)) { + throw new UnsupportedDoubleException(); + } + statsObj.getStatsData().getDoubleStats().setLowValue(d); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDoubleStats().setBitVectors(buf); + ; + } + } - table = hive.getTable(tableName); - - } catch (HiveException e) { - LOG.error("Cannot get table " + tableName, e); - console.printError("Cannot get table " + tableName, e.toString()); + private void unpackDecimalStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDecimalStats().setNumDVs(v); + } else if (fName.equals("max")) { + HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setHighValue(convertToThriftDecimal(d)); + } else if (fName.equals("min")) { + HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d)); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDecimalStats().setBitVectors(buf); + ; } + } - return aggregateStats(hive); + private Decimal convertToThriftDecimal(HiveDecimal d) { + return new Decimal(ByteBuffer.wrap(d.unscaledValue().toByteArray()), (short) d.scale()); + } + private void unpackLongStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setNumDVs(v); + } else if (fName.equals("max")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setHighValue(v); + } else if (fName.equals("min")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getLongStats().setLowValue(v); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getLongStats().setBitVectors(buf); + ; + } } - @Override - public StageType getType() { - return StageType.STATS; + private void unpackStringStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setNumDVs(v); + } else if (fName.equals("avglength")) { + double d = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setAvgColLen(d); + } else if (fName.equals("maxlength")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getStringStats().setMaxColLen(v); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getStringStats().setBitVectors(buf); + ; + } } - @Override - public String getName() { - return "STATS"; + private void unpackBinaryStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setNumNulls(v); + } else if (fName.equals("avglength")) { + double d = ((DoubleObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setAvgColLen(d); + } else if (fName.equals("maxlength")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getBinaryStats().setMaxColLen(v); + } } - private int aggregateStats(Hive db) { + private void unpackDateStats(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getDateStats().setNumDVs(v); + } else if (fName.equals("max")) { + DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setHighValue(new Date(v.getDays())); + } else if (fName.equals("min")) { + DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays())); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getDateStats().setBitVectors(buf); + ; + } + } - StatsAggregator statsAggregator = null; - int ret = 0; - StatsCollectionContext scc = null; - EnvironmentContext environmentContext = null; - try { - // Stats setup: - final Warehouse wh = new Warehouse(conf); - if (!getWork().getNoStatsAggregator() && !getWork().isNoScanAnalyzeCommand()) { - try { - scc = getContext(); - statsAggregator = createStatsAggregator(scc, conf); - } catch (HiveException e) { - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { - throw e; - } - console.printError(ErrorMsg.STATS_SKIPPING_BY_ERROR.getErrorCodedMsg(e.toString())); - } + private void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, + ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { + if (o == null) { + return; + } + // First infer the type of object + if (fieldName.equals("columntype")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + String s = ((StringObjectInspector) poi).getPrimitiveJavaObject(o); + ColumnStatisticsData statsData = new ColumnStatisticsData(); + + if (s.equalsIgnoreCase("long")) { + LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); + statsData.setLongStats(longStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("double")) { + DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); + statsData.setDoubleStats(doubleStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("string")) { + StringColumnStatsDataInspector stringStats = new StringColumnStatsDataInspector(); + statsData.setStringStats(stringStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("boolean")) { + BooleanColumnStatsData booleanStats = new BooleanColumnStatsData(); + statsData.setBooleanStats(booleanStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("binary")) { + BinaryColumnStatsData binaryStats = new BinaryColumnStatsData(); + statsData.setBinaryStats(binaryStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("decimal")) { + DecimalColumnStatsDataInspector decimalStats = new DecimalColumnStatsDataInspector(); + statsData.setDecimalStats(decimalStats); + statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("date")) { + DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); + statsData.setDateStats(dateStats); + statsObj.setStatsData(statsData); + } + } else { + // invoke the right unpack method depending on data type of the column + if (statsObj.getStatsData().isSetBooleanStats()) { + unpackBooleanStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetLongStats()) { + unpackLongStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetDoubleStats()) { + unpackDoubleStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetStringStats()) { + unpackStringStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetBinaryStats()) { + unpackBinaryStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetDecimalStats()) { + unpackDecimalStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetDateStats()) { + unpackDateStats(oi, o, fieldName, statsObj); } + } + } - List partitions = getPartitionsList(db); - boolean atomic = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_ATOMIC); - - String tableFullName = table.getDbName() + "." + table.getTableName(); - - if (partitions == null) { - org.apache.hadoop.hive.metastore.api.Table tTable = table.getTTable(); - Map parameters = tTable.getParameters(); - // In the following scenarios, we need to reset the stats to true. - // work.getTableSpecs() != null means analyze command - // work.getLoadTableDesc().getReplace() is true means insert overwrite command - // work.getLoadFileDesc().getDestinationCreateTable().isEmpty() means CTAS etc. - // acidTable will not have accurate stats unless it is set through analyze command. - if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) { - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); - } else if (work.getTableSpecs() != null - || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) - || (work.getLoadFileDesc() != null && !work.getLoadFileDesc() - .getDestinationCreateTable().isEmpty())) { - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); - } - // non-partitioned tables: - if (!existStats(parameters) && atomic) { - return 0; - } + private void unpackStructObject(ObjectInspector oi, Object o, String fName, + ColumnStatisticsObj cStatsObj) throws UnsupportedDoubleException { + if (oi.getCategory() != ObjectInspector.Category.STRUCT) { + throw new RuntimeException("Invalid object datatype : " + oi.getCategory().toString()); + } - // The collectable stats for the aggregator needs to be cleared. - // For eg. if a file is being loaded, the old number of rows are not valid - if (work.isClearAggregatorStats()) { - // we choose to keep the invalid stats and only change the setting. - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); - } + StructObjectInspector soi = (StructObjectInspector) oi; + List fields = soi.getAllStructFieldRefs(); + List list = soi.getStructFieldsDataAsList(o); - updateQuickStats(wh, parameters, tTable.getSd()); - if (StatsSetupConst.areBasicStatsUptoDate(parameters)) { - if (statsAggregator != null) { - String prefix = getAggregationPrefix(table, null); - updateStats(statsAggregator, parameters, prefix, atomic); - } - // write table stats to metastore - if (!getWork().getNoStatsAggregator()) { - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, - StatsSetupConst.TASK); - } - } + for (int i = 0; i < fields.size(); i++) { + // Get the field objectInspector, fieldName and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = (list == null ? null : list.get(i)); + String fieldName = fields.get(i).getFieldName(); - getHive().alterTable(tableFullName, new Table(tTable), environmentContext); - if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { - console.printInfo("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); - } - LOG.info("Table " + tableFullName + " stats: [" + toString(parameters) + ']'); + if (foi.getCategory() == ObjectInspector.Category.PRIMITIVE) { + unpackPrimitiveObject(foi, f, fieldName, cStatsObj); } else { - // Partitioned table: - // Need to get the old stats of the partition - // and update the table stats based on the old and new stats. - List updates = new ArrayList(); - - //Get the file status up-front for all partitions. Beneficial in cases of blob storage systems - final Map fileStatusMap = new ConcurrentHashMap(); - int poolSize = conf.getInt(ConfVars.HIVE_MOVE_FILES_THREAD_COUNT.varname, 1); - // In case thread count is set to 0, use single thread. - poolSize = Math.max(poolSize, 1); - final ExecutorService pool = Executors.newFixedThreadPool(poolSize, - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("stats-updater-thread-%d") - .build()); - final List> futures = Lists.newLinkedList(); - LOG.debug("Getting file stats of all partitions. threadpool size:" + poolSize); - try { - for(final Partition partn : partitions) { - final String partitionName = partn.getName(); - final org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); - Map parameters = tPart.getParameters(); - - if (!existStats(parameters) && atomic) { - continue; - } - futures.add(pool.submit(new Callable() { - @Override - public Void call() throws Exception { - FileStatus[] partfileStatus = wh.getFileStatusesForSD(tPart.getSd()); - fileStatusMap.put(partitionName, partfileStatus); - return null; - } - })); - } - pool.shutdown(); - for(Future future : futures) { - future.get(); - } - } catch (InterruptedException e) { - LOG.debug("Cancelling " + futures.size() + " file stats lookup tasks"); - //cancel other futures - for (Future future : futures) { - future.cancel(true); - } - // Fail the query if the stats are supposed to be reliable - if (work.isStatsReliable()) { - ret = 1; - } - } finally { - if (pool != null) { - pool.shutdownNow(); - } - LOG.debug("Finished getting file stats of all partitions"); - } - - for (Partition partn : partitions) { - // - // get the old partition stats - // - org.apache.hadoop.hive.metastore.api.Partition tPart = partn.getTPartition(); - Map parameters = tPart.getParameters(); - if (work.getTableSpecs() == null && AcidUtils.isAcidTable(table)) { - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); - } else if (work.getTableSpecs() != null - || (work.getLoadTableDesc() != null && work.getLoadTableDesc().getReplace()) - || (work.getLoadFileDesc() != null && !work.getLoadFileDesc() - .getDestinationCreateTable().isEmpty())) { - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.TRUE); - } - //only when the stats exist, it is added to fileStatusMap - if (!fileStatusMap.containsKey(partn.getName())) { - continue; - } - - // The collectable stats for the aggregator needs to be cleared. - // For eg. if a file is being loaded, the old number of rows are not valid - if (work.isClearAggregatorStats()) { - // we choose to keep the invalid stats and only change the setting. - StatsSetupConst.setBasicStatsState(parameters, StatsSetupConst.FALSE); - } - - updateQuickStats(parameters, fileStatusMap.get(partn.getName())); - if (StatsSetupConst.areBasicStatsUptoDate(parameters)) { - if (statsAggregator != null) { - String prefix = getAggregationPrefix(table, partn); - updateStats(statsAggregator, parameters, prefix, atomic); - } - if (!getWork().getNoStatsAggregator()) { - environmentContext = new EnvironmentContext(); - environmentContext.putToProperties(StatsSetupConst.STATS_GENERATED, - StatsSetupConst.TASK); - } - } - updates.add(new Partition(table, tPart)); - - if (conf.getBoolVar(ConfVars.TEZ_EXEC_SUMMARY)) { - console.printInfo("Partition " + tableFullName + partn.getSpec() + - " stats: [" + toString(parameters) + ']'); - } - LOG.info("Partition " + tableFullName + partn.getSpec() + - " stats: [" + toString(parameters) + ']'); - } - if (!updates.isEmpty()) { - db.alterPartitions(tableFullName, updates, environmentContext); - } + unpackStructObject(foi, f, fieldName, cStatsObj); } + } + } - } catch (Exception e) { - console.printInfo("[Warning] could not update stats.", - "Failed with exception " + e.getMessage() + "\n" - + StringUtils.stringifyException(e)); + private List constructColumnStatsFromPackedRows(Hive db) throws HiveException, + MetaException, IOException { + + String currentDb = SessionState.get().getCurrentDatabase(); + String tableName = work.getColStats().getTableName(); + String partName = null; + List colName = work.getColStats().getColName(); + List colType = work.getColStats().getColType(); + boolean isTblLevel = work.getColStats().isTblLevel(); + + List stats = new ArrayList(); + InspectableObject packedRow; + Table tbl = db.getTable(currentDb, tableName); + while ((packedRow = ftOp.getNextRow()) != null) { + if (packedRow.oi.getCategory() != ObjectInspector.Category.STRUCT) { + throw new HiveException("Unexpected object type encountered while unpacking row"); + } - // Fail the query if the stats are supposed to be reliable - if (work.isStatsReliable()) { - ret = 1; + List statsObjs = new ArrayList(); + StructObjectInspector soi = (StructObjectInspector) packedRow.oi; + List fields = soi.getAllStructFieldRefs(); + List list = soi.getStructFieldsDataAsList(packedRow.o); + + List partColSchema = tbl.getPartCols(); + // Partition columns are appended at end, we only care about stats column + int numOfStatCols = isTblLevel ? fields.size() : fields.size() - partColSchema.size(); + for (int i = 0; i < numOfStatCols; i++) { + // Get the field objectInspector, fieldName and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = (list == null ? null : list.get(i)); + String fieldName = fields.get(i).getFieldName(); + ColumnStatisticsObj statsObj = new ColumnStatisticsObj(); + statsObj.setColName(colName.get(i)); + statsObj.setColType(colType.get(i)); + try { + unpackStructObject(foi, f, fieldName, statsObj); + statsObjs.add(statsObj); + } catch (UnsupportedDoubleException e) { + // due to infinity or nan. + LOG.info("Because " + colName.get(i) + " is infinite or NaN, we skip stats."); + } } - } finally { - if (statsAggregator != null) { - statsAggregator.closeConnection(scc); + + if (!isTblLevel) { + List partVals = new ArrayList(); + // Iterate over partition columns to figure out partition name + for (int i = fields.size() - partColSchema.size(); i < fields.size(); i++) { + Object partVal = ((PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector()) + .getPrimitiveJavaObject(list.get(i)); + partVals.add(partVal == null ? // could be null for default partition + this.conf.getVar(ConfVars.DEFAULTPARTITIONNAME) + : partVal.toString()); + } + partName = Warehouse.makePartName(partColSchema, partVals); + } + String[] names = Utilities.getDbTableName(currentDb, tableName); + ColumnStatisticsDesc statsDesc = getColumnStatsDesc(names[0], names[1], partName, isTblLevel); + ColumnStatistics colStats = new ColumnStatistics(); + colStats.setStatsDesc(statsDesc); + colStats.setStatsObj(statsObjs); + if (!colStats.getStatsObj().isEmpty()) { + stats.add(colStats); } } - // The return value of 0 indicates success, - // anything else indicates failure - return ret; + ftOp.clearFetchContext(); + return stats; } - private String getAggregationPrefix(Table table, Partition partition) - throws MetaException { - - // prefix is of the form dbName.tblName - String prefix = table.getDbName() + "." + MetaStoreUtils.encodeTableName(table.getTableName()); - if (partition != null) { - return Utilities.join(prefix, Warehouse.makePartPath(partition.getSpec())); + private ColumnStatisticsDesc getColumnStatsDesc(String dbName, String tableName, String partName, + boolean isTblLevel) { + ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(); + statsDesc.setDbName(dbName); + statsDesc.setTableName(tableName); + statsDesc.setIsTblLevel(isTblLevel); + + if (!isTblLevel) { + statsDesc.setPartName(partName); + } else { + statsDesc.setPartName(null); } - return prefix; + return statsDesc; } - private StatsAggregator createStatsAggregator(StatsCollectionContext scc, HiveConf conf) throws HiveException { - String statsImpl = HiveConf.getVar(conf, HiveConf.ConfVars.HIVESTATSDBCLASS); - StatsFactory factory = StatsFactory.newFactory(statsImpl, conf); - if (factory == null) { - throw new HiveException(ErrorMsg.STATSPUBLISHER_NOT_OBTAINED.getErrorCodedMsg()); - } - // initialize stats publishing table for noscan which has only stats task - // the rest of MR task following stats task initializes it in ExecDriver.java - StatsPublisher statsPublisher = factory.getStatsPublisher(); - if (!statsPublisher.init(scc)) { // creating stats table if not exists - throw new HiveException(ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); - } - - // manufacture a StatsAggregator - StatsAggregator statsAggregator = factory.getStatsAggregator(); - if (!statsAggregator.connect(scc)) { - throw new HiveException(ErrorMsg.STATSAGGREGATOR_CONNECTION_ERROR.getErrorCodedMsg(statsImpl)); + private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException { + // Construct a column statistics object from the result + List colStats = constructColumnStatsFromPackedRows(db); + // Persist the column statistics object to the metastore + // Note, this function is shared for both table and partition column stats. + if (colStats.isEmpty()) { + return 0; } - return statsAggregator; + SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); + request.setNeedMerge(work.getColStats().isNeedMerge()); + db.setPartitionColumnStatistics(request); + return 0; } - private StatsCollectionContext getContext() throws HiveException { - - StatsCollectionContext scc = new StatsCollectionContext(conf); - Task sourceTask = getWork().getSourceTask(); - if (sourceTask == null) { - throw new HiveException(ErrorMsg.STATSAGGREGATOR_SOURCETASK_NULL.getErrorCodedMsg()); + @Override + public int execute(DriverContext driverContext) { + if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) { + return 0; } - scc.setTask(sourceTask); - scc.setStatsTmpDir(this.getWork().getStatsTmpDir()); - return scc; - } - - private boolean existStats(Map parameters) { - return parameters.containsKey(StatsSetupConst.ROW_COUNT) - || parameters.containsKey(StatsSetupConst.NUM_FILES) - || parameters.containsKey(StatsSetupConst.TOTAL_SIZE) - || parameters.containsKey(StatsSetupConst.RAW_DATA_SIZE) - || parameters.containsKey(StatsSetupConst.NUM_PARTITIONS); - } - private void updateStats(StatsAggregator statsAggregator, - Map parameters, String prefix, boolean atomic) - throws HiveException { - - String aggKey = prefix.endsWith(Path.SEPARATOR) ? prefix : prefix + Path.SEPARATOR; - - for (String statType : StatsSetupConst.statsRequireCompute) { - String value = statsAggregator.aggregateStats(aggKey, statType); - if (value != null && !value.isEmpty()) { - long longValue = Long.parseLong(value); + // TODO: merge BasicStatsWork and BasicStatsNoJobWork + if (work.getBasicStatsWork() != null && work.getBasicStatsNoJobWork() != null) { + LOG.error("Can not have both basic stats work and stats no job work!"); + return 1; + } + int ret = 0; + if (work.getBasicStatsWork() != null) { + work.getBasicStatsWork().setFollowedByColStats(work.getfWork() != null); + Task basicStatsTask = TaskFactory.get(work.getBasicStatsWork(), conf); + basicStatsTask.initialize(queryState, queryPlan, driverContext, null); + ((BasicStatsTask) basicStatsTask).setDpPartSpecs(dpPartSpecs); + ret = ((BasicStatsTask) basicStatsTask).execute(driverContext); + } + if (work.getBasicStatsNoJobWork() != null) { + Task basicStatsTask = TaskFactory.get(work.getBasicStatsNoJobWork(), + conf); + basicStatsTask.initialize(queryState, queryPlan, driverContext, null); + ret = ((BasicStatsNoJobTask) basicStatsTask).execute(driverContext); + } + if (ret != 0) { + return ret; + } - if (work.getLoadTableDesc() != null && - !work.getLoadTableDesc().getReplace()) { - String originalValue = parameters.get(statType); - if (originalValue != null) { - longValue += Long.parseLong(originalValue); // todo: invalid + valid = invalid - } - } - parameters.put(statType, String.valueOf(longValue)); - } else { - if (atomic) { - throw new HiveException(ErrorMsg.STATSAGGREGATOR_MISSED_SOMESTATS, statType); - } + if (work.getfWork() != null) { + try { + Hive db = getHive(); + return persistColumnStats(db); + } catch (Exception e) { + LOG.error("Failed to run column stats task", e); + return 1; } } + return 0; } - private void updateQuickStats(Warehouse wh, Map parameters, - StorageDescriptor desc) throws MetaException { - /** - * calculate fast statistics - */ - FileStatus[] partfileStatus = wh.getFileStatusesForSD(desc); - updateQuickStats(parameters, partfileStatus); - } - - private void updateQuickStats(Map parameters, - FileStatus[] partfileStatus) throws MetaException { - MetaStoreUtils.populateQuickStats(partfileStatus, parameters); + @Override + public StageType getType() { + return StageType.COLUMNSTATS; } - private String toString(Map parameters) { - StringBuilder builder = new StringBuilder(); - for (String statType : StatsSetupConst.supportedStats) { - String value = parameters.get(statType); - if (value != null) { - if (builder.length() > 0) { - builder.append(", "); - } - builder.append(statType).append('=').append(value); - } - } - return builder.toString(); + @Override + public String getName() { + return "COLUMNSTATS TASK"; } - /** - * Get the list of partitions that need to update statistics. - * TODO: we should reuse the Partitions generated at compile time - * since getting the list of partitions is quite expensive. - * - * @return a list of partitions that need to update statistics. - * @throws HiveException - */ - private List getPartitionsList(Hive db) throws HiveException { - if (work.getLoadFileDesc() != null) { - return null; //we are in CTAS, so we know there are no partitions - } - - List list = new ArrayList(); - - if (work.getTableSpecs() != null) { - - // ANALYZE command - TableSpec tblSpec = work.getTableSpecs(); - table = tblSpec.tableHandle; - if (!table.isPartitioned()) { - return null; - } - // get all partitions that matches with the partition spec - List partitions = tblSpec.partitions; - if (partitions != null) { - for (Partition partn : partitions) { - list.add(partn); - } - } - } else if (work.getLoadTableDesc() != null) { + private Collection dpPartSpecs; - // INSERT OVERWRITE command - LoadTableDesc tbd = work.getLoadTableDesc(); - table = db.getTable(tbd.getTable().getTableName()); - if (!table.isPartitioned()) { - return null; - } - DynamicPartitionCtx dpCtx = tbd.getDPCtx(); - if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions - // If no dynamic partitions are generated, dpPartSpecs may not be initialized - if (dpPartSpecs != null) { - // load the list of DP partitions and return the list of partition specs - list.addAll(dpPartSpecs); - } - } else { // static partition - Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false); - list.add(partn); - } + @Override + protected void receiveFeed(FeedType feedType, Object feedValue) { + // this method should be called by MoveTask when there are dynamic + // partitions generated + if (feedType == FeedType.DYNAMIC_PARTITIONS) { + dpPartSpecs = (Collection) feedValue; } - return list; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java index fe9b6244df..abfdd867b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; -import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.ConditionalWork; import org.apache.hadoop.hive.ql.plan.CopyWork; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -54,8 +54,8 @@ import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.ReplCopyWork; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.TezWork; /** @@ -99,10 +99,9 @@ public TaskTuple(Class workClass, Class> taskClass) { taskvec.add(new TaskTuple(MapredLocalWork.class, MapredLocalTask.class)); - taskvec.add(new TaskTuple(StatsWork.class, - StatsTask.class)); - taskvec.add(new TaskTuple(StatsNoJobWork.class, StatsNoJobTask.class)); - taskvec.add(new TaskTuple(ColumnStatsWork.class, ColumnStatsTask.class)); + taskvec.add(new TaskTuple(BasicStatsWork.class, BasicStatsTask.class)); + taskvec.add(new TaskTuple(BasicStatsNoJobWork.class, BasicStatsNoJobTask.class)); + taskvec.add(new TaskTuple(StatsWork.class, StatsTask.class)); taskvec.add(new TaskTuple(ColumnStatsUpdateWork.class, ColumnStatsUpdateTask.class)); taskvec.add(new TaskTuple(MergeFileWork.class, MergeFileTask.class)); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index aa44c62d88..54a7701fa3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1742,7 +1742,9 @@ public Partition loadPartition(Path loadPath, Table tbl, } // column stats will be inaccurate - StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); + if (!hasFollowingStatsTask) { + StatsSetupConst.clearColumnStatsState(newTPart.getParameters()); + } // recreate the partition if it existed before if (isSkewedStoreAsSubdir) { @@ -1761,8 +1763,8 @@ public Partition loadPartition(Path loadPath, Table tbl, if (oldPart == null) { newTPart.getTPartition().setParameters(new HashMap()); if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), null, - StatsSetupConst.TRUE); + StatsSetupConst.setStatsStateForCreateTable(newTPart.getParameters(), + MetaStoreUtils.getColumnNames(tbl.getCols()), StatsSetupConst.TRUE); } MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters()); try { @@ -2124,7 +2126,9 @@ public void loadTable(Path loadPath, String tableName, boolean replace, boolean } //column stats will be inaccurate - StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + if (!hasFollowingStatsTask) { + StatsSetupConst.clearColumnStatsState(tbl.getParameters()); + } try { if (isSkewedStoreAsSubdir) { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index 61f6a7c4ff..e49c5ae3d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -434,6 +434,8 @@ private void createTempTable(org.apache.hadoop.hive.metastore.api.Table tbl, // Add temp table info to current session Table tTable = new Table(tbl); + StatsSetupConst.setStatsStateForCreateTable(tbl.getParameters(), + MetaStoreUtils.getColumnNamesForTable(tbl), StatsSetupConst.TRUE); if (tables == null) { tables = new HashMap(); ss.getTempTables().put(dbName, tables); @@ -711,6 +713,13 @@ private boolean updateTempTableColumnStats(String dbName, String tableName, ssTableColStats); } mergeColumnStats(ssTableColStats, colStats); + + List colNames = new ArrayList<>(); + for (ColumnStatisticsObj obj : colStats.getStatsObj()) { + colNames.add(obj.getColName()); + } + org.apache.hadoop.hive.metastore.api.Table table = getTempTable(dbName, tableName); + StatsSetupConst.setColumnStatsState(table.getParameters(), colNames); return true; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java index 9297a0b874..9f223af877 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; @@ -45,10 +46,11 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.mapred.InputFormat; /** @@ -99,7 +101,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any MR or Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(op.getConf().getTableMetadata().getTableSpec()); + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(op.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseCtx.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list @@ -111,7 +113,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, partCols, false); snjWork.setPrunedPartitionList(partList); } - Task snjTask = TaskFactory.get(snjWork, parseCtx.getConf()); + Task snjTask = TaskFactory.get(snjWork, parseCtx.getConf()); ctx.setCurrTask(snjTask); ctx.setCurrTopOp(null); ctx.getRootTasks().clear(); @@ -121,14 +123,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // The plan consists of a simple MapRedTask followed by a StatsTask. // The MR task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(op.getConf().getTableMetadata().getTableSpec()); + BasicStatsWork statsWork = new BasicStatsWork(op.getConf().getTableMetadata().getTableSpec()); statsWork.setAggKey(op.getConf().getStatsAggPrefix()); statsWork.setStatsTmpDir(op.getConf().getTmpStatsDir()); statsWork.setSourceTask(currTask); statsWork.setStatsReliable(parseCtx.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseCtx.getConf()); - currTask.addDependentTask(statsTask); + StatsWork columnStatsWork = new StatsWork(statsWork); + Task columnStatsTask = TaskFactory.get(columnStatsWork, parseCtx.getConf()); + currTask.addDependentTask(columnStatsTask); if (!ctx.getRootTasks().contains(currTask)) { ctx.getRootTasks().add(currTask); } @@ -136,15 +139,15 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (noScan) { - statsTask.setParentTasks(null); + columnStatsTask.setParentTasks(null); statsWork.setNoScanAnalyzeCommand(true); ctx.getRootTasks().remove(currTask); - ctx.getRootTasks().add(statsTask); + ctx.getRootTasks().add(columnStatsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (partialScan) { - handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, statsTask); + handlePartialScanCommand(op, ctx, parseCtx, currTask, statsWork, columnStatsTask); } currWork.getMapWork().setGatheringStats(true); @@ -188,7 +191,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx opProcCtx, */ private void handlePartialScanCommand(TableScanOperator op, GenMRProcContext ctx, ParseContext parseCtx, Task currTask, - StatsWork statsWork, Task statsTask) throws SemanticException { + BasicStatsWork statsWork, Task statsTask) throws SemanticException { String aggregationKey = op.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); List inputPaths = GenMapRedUtils.getInputPathsForPartialScan(op, aggregationKeyBuffer); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index da153e36d2..0286f1343a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -35,6 +35,7 @@ import java.util.Set; import com.google.common.annotations.VisibleForTesting; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.BlobStorageUtils; @@ -88,6 +89,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles; import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles.ConditionalResolverMergeFilesCtx; import org.apache.hadoop.hive.ql.plan.ConditionalWork; @@ -111,7 +113,7 @@ import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; @@ -498,6 +500,10 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set currTask, HiveConf hconf) { MoveWork mvWork = mvTask.getWork(); - StatsWork statsWork = null; + BasicStatsWork statsWork = null; if (mvWork.getLoadTableWork() != null) { - statsWork = new StatsWork(mvWork.getLoadTableWork()); + statsWork = new BasicStatsWork(mvWork.getLoadTableWork()); } else if (mvWork.getLoadFileWork() != null) { - statsWork = new StatsWork(mvWork.getLoadFileWork()); + statsWork = new BasicStatsWork(mvWork.getLoadFileWork()); } assert statsWork != null : "Error when generating StatsTask"; @@ -1506,7 +1512,8 @@ public static void addStatsTask(FileSinkOperator nd, MoveTask mvTask, // AggKey in StatsWork is used for stats aggregation while StatsAggPrefix // in FileSinkDesc is used for stats publishing. They should be consistent. statsWork.setAggKey(nd.getConf().getStatsAggPrefix()); - Task statsTask = TaskFactory.get(statsWork, hconf); + StatsWork columnStatsWork = new StatsWork(statsWork); + Task statsTask = TaskFactory.get(columnStatsWork, hconf); // subscribe feeds from the MoveTask so that MoveTask can forward the list // of dynamic partition list to the StatsTask diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java index 3a20cfe7ac..eae9505709 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MemoryDecider.java @@ -35,10 +35,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -92,8 +93,9 @@ public MemoryCalculator(PhysicalContext pctx) { public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; - if (currTask instanceof StatsTask) { - currTask = ((StatsTask) currTask).getWork().getSourceTask(); + if (currTask instanceof StatsTask + && ((StatsTask) currTask).getWork().getBasicStatsWork() != null) { + currTask = ((StatsTask) currTask).getWork().getBasicStatsWork().getSourceTask(); } if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java index dc433fed22..f3c6daedfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SerializeFilter.java @@ -26,8 +26,9 @@ import java.util.Set; import java.util.Stack; -import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.SerializationUtilities; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.tez.TezTask; @@ -71,8 +72,9 @@ public Serializer(PhysicalContext pctx) { public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task currTask = (Task) nd; - if (currTask instanceof StatsTask) { - currTask = ((StatsTask) currTask).getWork().getSourceTask(); + if (currTask instanceof StatsTask + && ((StatsTask) currTask).getWork().getBasicStatsWork() != null) { + currTask = ((StatsTask) currTask).getWork().getBasicStatsWork().getSourceTask(); } if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 251decac9b..9e2015221e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -91,6 +91,7 @@ import org.apache.hadoop.hive.ql.plan.CacheMetadataDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; import org.apache.hadoop.hive.ql.plan.CreateIndexDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -130,7 +131,7 @@ import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; import org.apache.hadoop.hive.ql.plan.ShowTblPropertiesDesc; import org.apache.hadoop.hive.ql.plan.ShowTxnsDesc; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TruncateTableDesc; @@ -1091,18 +1092,19 @@ private void analyzeTruncateTable(ASTNode ast) throws SemanticException { // Recalculate the HDFS stats if auto gather stats is set if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsWork statDesc; + BasicStatsWork basicStatsWork; if (oldTblPartLoc.equals(newTblPartLoc)) { // If we're merging to the same location, we can avoid some metastore calls TableSpec tablepart = new TableSpec(this.db, conf, root); - statDesc = new StatsWork(tablepart); + basicStatsWork = new BasicStatsWork(tablepart); } else { - statDesc = new StatsWork(ltd); + basicStatsWork = new BasicStatsWork(ltd); } - statDesc.setNoStatsAggregator(true); - statDesc.setClearAggregatorStats(true); - statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statTask = TaskFactory.get(statDesc, conf); + basicStatsWork.setNoStatsAggregator(true); + basicStatsWork.setClearAggregatorStats(true); + basicStatsWork.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + StatsWork columnStatsWork = new StatsWork(basicStatsWork); + Task statTask = TaskFactory.get(columnStatsWork, conf); moveTsk.addDependentTask(statTask); } } catch (HiveException e) { @@ -1724,18 +1726,19 @@ private void analyzeAlterTablePartMergeFiles(ASTNode ast, mergeTask.addDependentTask(moveTsk); if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsWork statDesc; + BasicStatsWork basicStatsWork; if (oldTblPartLoc.equals(newTblPartLoc)) { // If we're merging to the same location, we can avoid some metastore calls TableSpec tableSpec = new TableSpec(db, tableName, partSpec); - statDesc = new StatsWork(tableSpec); + basicStatsWork = new BasicStatsWork(tableSpec); } else { - statDesc = new StatsWork(ltd); + basicStatsWork = new BasicStatsWork(ltd); } - statDesc.setNoStatsAggregator(true); - statDesc.setClearAggregatorStats(true); - statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statTask = TaskFactory.get(statDesc, conf); + basicStatsWork.setNoStatsAggregator(true); + basicStatsWork.setClearAggregatorStats(true); + basicStatsWork.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + StatsWork columnStatsWork = new StatsWork(basicStatsWork); + Task statTask = TaskFactory.get(columnStatsWork, conf); moveTsk.addDependentTask(statTask); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 7a0d4a752e..fbef0dd837 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ExplainTask; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 1b0a2f0661..7f2d4f59cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -204,10 +204,6 @@ protected void setupMapWork(MapWork mapWork, GenTezProcContext context, // All the setup is done in GenMapRedUtils GenMapRedUtils.setMapWork(mapWork, context.parseContext, context.inputs, partitions, root, alias, context.conf, false); - // we also collect table stats while collecting column stats. - if (context.parseContext.getAnalyzeRewrite() != null) { - mapWork.setGatheringStats(true); - } } // removes any union operator and clones the plan diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java index fa79700df7..6a7b3db17a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.parse; import org.apache.hadoop.hive.conf.HiveConf.StrictChecks; - import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import java.io.IOException; @@ -51,9 +50,10 @@ import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; import org.apache.hadoop.hive.ql.plan.MoveWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Lists; @@ -301,11 +301,12 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { // Update the stats which do not require a complete scan. Task statTask = null; if (conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { - StatsWork statDesc = new StatsWork(loadTableWork); - statDesc.setNoStatsAggregator(true); - statDesc.setClearAggregatorStats(true); - statDesc.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - statTask = TaskFactory.get(statDesc, conf); + BasicStatsWork basicStatsWork = new BasicStatsWork(loadTableWork); + basicStatsWork.setNoStatsAggregator(true); + basicStatsWork.setClearAggregatorStats(true); + basicStatsWork.setStatsReliable(conf.getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + StatsWork columnStatsWork = new StatsWork(basicStatsWork); + statTask = TaskFactory.get(columnStatsWork, conf); } // HIVE-3334 has been filed for load file with index auto update diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index b6d7ee8a92..abc7ed29a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -44,9 +44,10 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.mapred.InputFormat; @@ -103,7 +104,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(tableScan.getConf().getTableMetadata() .getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); @@ -116,7 +117,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, false); snjWork.setPrunedPartitionList(partList); } - Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); + Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); snjTask.setParentTasks(null); context.rootTasks.remove(context.currentTask); context.rootTasks.add(snjTask); @@ -127,27 +128,28 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, // The plan consists of a simple TezTask followed by a StatsTask. // The Tez task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + BasicStatsWork basicStatsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + basicStatsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + basicStatsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + basicStatsWork.setSourceTask(context.currentTask); + basicStatsWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + StatsWork columnStatsWork = new StatsWork(basicStatsWork); + Task statsTask = TaskFactory.get(columnStatsWork, parseContext.getConf()); context.currentTask.addDependentTask(statsTask); // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); - statsWork.setNoScanAnalyzeCommand(true); + columnStatsWork.getBasicStatsWork().setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); context.rootTasks.add(statsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); + handlePartialScanCommand(tableScan, parseContext, basicStatsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to @@ -166,65 +168,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procContext, return true; } - } else if (parseContext.getAnalyzeRewrite() != null) { - // we need to collect table stats while collecting column stats. - try { - context.currentTask.addDependentTask(genTableStats(context, tableScan)); - } catch (HiveException e) { - throw new SemanticException(e); - } } return null; } - private Task genTableStats(GenTezProcContext context, TableScanOperator tableScan) - throws HiveException { - Class inputFormat = tableScan.getConf().getTableMetadata() - .getInputFormatClass(); - ParseContext parseContext = context.parseContext; - Table table = tableScan.getConf().getTableMetadata(); - List partitions = new ArrayList<>(); - if (table.isPartitioned()) { - partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); - for (Partition partn : partitions) { - LOG.debug("XXX: adding part: " + partn); - context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); - } - } - TableSpec tableSpec = new TableSpec(table, partitions); - tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); - - if (inputFormat.equals(OrcInputFormat.class)) { - // For ORC, there is no Tez Job for table stats. - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() - .getTableSpec()); - snjWork.setStatsReliable(parseContext.getConf().getBoolVar( - HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - // If partition is specified, get pruned partition list - if (partitions.size() > 0) { - snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); - } - return TaskFactory.get(snjWork, parseContext.getConf()); - } else { - - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar( - HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - return TaskFactory.get(statsWork, parseContext.getConf()); - } - } - /** * handle partial scan command. * * It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, - StatsWork statsWork, GenTezProcContext context, Task statsTask) + BasicStatsWork statsWork, GenTezProcContext context, Task statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1c74779dec..de07ef24d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7330,7 +7330,8 @@ protected Operator genFileSinkPlan(String dest, QB qb, Operator input) // the following code is used to collect column stats when // hive.stats.autogather=true // and it is an insert overwrite or insert into table - if (dest_tab != null && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) + if (dest_tab != null && !dest_tab.isNonNative() + && conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) { if (dest_type.intValue() == QBMetaData.DEST_TABLE) { @@ -10474,10 +10475,7 @@ private void setupStats(TableScanDesc tsDesc, QBParseInfo qbp, Table tab, String throws SemanticException { // if it is not analyze command and not column stats, then do not gatherstats - // if it is column stats, but it is not tez, do not gatherstats - if ((!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) - || (qbp.getAnalyzeRewrite() != null && !HiveConf.getVar(conf, - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) { + if (!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) { tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java index 08a8f00e06..9ef2fe367d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java @@ -21,11 +21,14 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.Stack; @@ -35,15 +38,17 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.HiveStatsUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.exec.ColumnStatsTask; +import org.apache.hadoop.hive.ql.exec.StatsTask; import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.StatsTask; +import org.apache.hadoop.hive.ql.exec.BasicStatsTask; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -51,13 +56,19 @@ import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc; -import org.apache.hadoop.hive.ql.plan.ColumnStatsWork; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; import org.apache.hadoop.hive.ql.plan.CreateViewDesc; import org.apache.hadoop.hive.ql.plan.DDLWork; @@ -76,6 +87,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.thrift.ThriftFormatter; import org.apache.hadoop.hive.serde2.thrift.ThriftJDBCBinarySerDe; +import org.apache.hadoop.mapred.InputFormat; import com.google.common.collect.Interner; import com.google.common.collect.Interners; @@ -294,18 +306,53 @@ public void compile(final ParseContext pCtx, final List> leafTasks = new LinkedHashSet>(); - getLeafTasks(rootTasks, leafTasks); + // map from tablename to task (ColumnStatsTask which includes a BasicStatsTask) + Map map = new LinkedHashMap<>(); if (isCStats) { - genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0); + if (rootTasks == null || rootTasks.size() != 1 || pCtx.getTopOps() == null + || pCtx.getTopOps().size() != 1) { + throw new SemanticException("Can not find correct root task!"); + } + try { + Task root = rootTasks.iterator().next(); + StatsTask tsk = (StatsTask) genTableStats(pCtx, pCtx.getTopOps().values() + .iterator().next(), root, outputs); + root.addDependentTask(tsk); + map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk); + } catch (HiveException e) { + throw new SemanticException(e); + } + genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, map, outerQueryLimit, 0); } else { + Set> leafTasks = new LinkedHashSet>(); + getLeafTasks(rootTasks, leafTasks); + List> nonStatsLeafTasks = new ArrayList<>(); + for (Task tsk : leafTasks) { + // map table name to the correct ColumnStatsTask + if (tsk instanceof StatsTask) { + map.put(extractTableFullName((StatsTask) tsk), (StatsTask) tsk); + } else { + nonStatsLeafTasks.add(tsk); + } + } + // add cStatsTask as a dependent of all the nonStatsLeafTasks + for (Task tsk : nonStatsLeafTasks) { + for (Task cStatsTask : map.values()) { + tsk.addDependentTask(cStatsTask); + } + } for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx .getColumnStatsAutoGatherContexts()) { if (!columnStatsAutoGatherContext.isInsertInto()) { genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), - columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0); + columnStatsAutoGatherContext.getLoadFileWork(), map, outerQueryLimit, 0); } else { int numBitVector; try { @@ -314,7 +361,7 @@ public void compile(final ParseContext pCtx, final List genTableStats(ParseContext parseContext, TableScanOperator tableScan, Task currentTask, final HashSet outputs) + throws HiveException { + Class inputFormat = tableScan.getConf().getTableMetadata() + .getInputFormatClass(); + Table table = tableScan.getConf().getTableMetadata(); + List partitions = new ArrayList<>(); + if (table.isPartitioned()) { + partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); + for (Partition partn : partitions) { + LOG.debug("XXX: adding part: " + partn); + outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); + } + } + TableSpec tableSpec = new TableSpec(table, partitions); + tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); + + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, there is no Tez Job for table stats. + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); + snjWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + if (partitions.size() > 0) { + snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); + } + StatsWork columnStatsWork = new StatsWork(snjWork); + return TaskFactory.get(columnStatsWork, parseContext.getConf()); + } else { + BasicStatsWork statsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + StatsWork columnStatsWork = new StatsWork(statsWork); + return TaskFactory.get(columnStatsWork, parseContext.getConf()); + } + } + private void patchUpAfterCTASorMaterializedView(final List> rootTasks, final HashSet outputs, Task createTask) { @@ -388,7 +496,8 @@ private void patchUpAfterCTASorMaterializedView(final List 0); for (Task task : leaves) { - if (task instanceof StatsTask) { + if (task instanceof StatsTask + && ((StatsTask) task).getWork().getBasicStatsWork() != null) { // StatsTask require table to already exist for (Task parentOfStatsTask : task.getParentTasks()) { parentOfStatsTask.addDependentTask(createTask); @@ -416,13 +525,12 @@ private void patchUpAfterCTASorMaterializedView(final List loadFileWork, Set> leafTasks, - int outerQueryLimit, int numBitVector) { - ColumnStatsTask cStatsTask = null; - ColumnStatsWork cStatsWork = null; + List loadFileWork, Map map, + int outerQueryLimit, int numBitVector) throws SemanticException { FetchWork fetch = null; String tableName = analyzeRewrite.getTableName(); List colName = analyzeRewrite.getColName(); @@ -450,10 +558,12 @@ protected void genColumnStatsTask(AnalyzeRewriteContext analyzeRewrite, ColumnStatsDesc cStatsDesc = new ColumnStatsDesc(tableName, colName, colType, isTblLevel, numBitVector); - cStatsWork = new ColumnStatsWork(fetch, cStatsDesc); - cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf); - for (Task tsk : leafTasks) { - tsk.addDependentTask(cStatsTask); + StatsTask columnStatsTask = map.get(tableName); + if (columnStatsTask == null) { + throw new SemanticException("Can not find " + tableName + " in genColumnStatsTask"); + } else { + columnStatsTask.getWork().setfWork(fetch); + columnStatsTask.getWork().setColStats(cStatsDesc); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java index a2876e1d4f..74162c20f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java @@ -42,10 +42,11 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.StatsWork; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; -import org.apache.hadoop.hive.ql.plan.StatsWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsNoJobWork; +import org.apache.hadoop.hive.ql.plan.BasicStatsWork; import org.apache.hadoop.mapred.InputFormat; import com.google.common.base.Preconditions; @@ -106,7 +107,7 @@ public Object process(Node nd, Stack stack, // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Spark job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); + BasicStatsNoJobWork snjWork = new BasicStatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list @@ -118,7 +119,7 @@ public Object process(Node nd, Stack stack, false); snjWork.setPrunedPartitionList(partList); } - Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); + Task snjTask = TaskFactory.get(snjWork, parseContext.getConf()); snjTask.setParentTasks(null); context.rootTasks.remove(context.currentTask); context.rootTasks.add(snjTask); @@ -129,26 +130,27 @@ public Object process(Node nd, Stack stack, // The plan consists of a simple SparkTask followed by a StatsTask. // The Spark task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + BasicStatsWork basicStatsWork = new BasicStatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + basicStatsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + basicStatsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + basicStatsWork.setSourceTask(context.currentTask); + basicStatsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + StatsWork columnStatsWork = new StatsWork(basicStatsWork); + Task statsTask = TaskFactory.get(columnStatsWork, parseContext.getConf()); context.currentTask.addDependentTask(statsTask); // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // The plan consists of a StatsTask only. if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { statsTask.setParentTasks(null); - statsWork.setNoScanAnalyzeCommand(true); + basicStatsWork.setNoScanAnalyzeCommand(true); context.rootTasks.remove(context.currentTask); context.rootTasks.add(statsTask); } // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); + handlePartialScanCommand(tableScan, parseContext, basicStatsWork, context, statsTask); } // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, @@ -176,7 +178,7 @@ public Object process(Node nd, Stack stack, * It is composed of PartialScanTask followed by StatsTask. */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, - StatsWork statsWork, GenSparkProcContext context, Task statsTask) + BasicStatsWork statsWork, GenSparkProcContext context, Task statsTask) throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsNoJobWork.java similarity index 83% rename from ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java rename to ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsNoJobWork.java index 77c04f6c6e..db33ac400d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsNoJobWork.java @@ -22,29 +22,27 @@ import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; -import org.apache.hadoop.hive.ql.plan.Explain.Level; /** * Client-side stats aggregator task. */ -@Explain(displayName = "Stats-Aggr Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class StatsNoJobWork implements Serializable { +public class BasicStatsNoJobWork implements Serializable { private static final long serialVersionUID = 1L; private TableSpec tableSpecs; private boolean statsReliable; private PrunedPartitionList prunedPartitionList; - public StatsNoJobWork() { + public BasicStatsNoJobWork() { } - public StatsNoJobWork(TableSpec tableSpecs) { + public BasicStatsNoJobWork(TableSpec tableSpecs) { this.tableSpecs = tableSpecs; } - public StatsNoJobWork(boolean statsReliable) { + public BasicStatsNoJobWork(boolean statsReliable) { this.statsReliable = statsReliable; } @@ -67,4 +65,5 @@ public void setPrunedPartitionList(PrunedPartitionList prunedPartitionList) { public PrunedPartitionList getPrunedPartitionList() { return prunedPartitionList; } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java new file mode 100644 index 0000000000..24dbff119a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BasicStatsWork.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.plan.Explain.Level; + + +/** + * ConditionalStats. + * + */ +public class BasicStatsWork implements Serializable { + private static final long serialVersionUID = 1L; + + private TableSpec tableSpecs; // source table spec -- for TableScanOperator + private LoadTableDesc loadTableDesc; // same as MoveWork.loadTableDesc -- for FileSinkOperator + private LoadFileDesc loadFileDesc; // same as MoveWork.loadFileDesc -- for FileSinkOperator + private String aggKey; // aggregation key prefix + private boolean statsReliable; // are stats completely reliable + + // If stats aggregator is not present, clear the current aggregator stats. + // For eg. if a merge is being performed, stats already collected by aggregator (numrows etc.) + // are still valid. However, if a load file is being performed, the old stats collected by + // aggregator are not valid. It might be a good idea to clear them instead of leaving wrong + // and old stats. + // Since HIVE-12661, we maintain the old stats (although may be wrong) for CBO + // purpose. We use a flag COLUMN_STATS_ACCURATE to + // show the accuracy of the stats. + + private boolean clearAggregatorStats = false; + + private boolean noStatsAggregator = false; + + private boolean isNoScanAnalyzeCommand = false; + + private boolean isPartialScanAnalyzeCommand = false; + + // sourceTask for TS is not changed (currently) but that of FS might be changed + // by various optimizers (auto.convert.join, for example) + // so this is set by DriverContext in runtime + private transient Task sourceTask; + + private boolean isFollowedByColStats = false; + + // used by FS based stats collector + private String statsTmpDir; + + public BasicStatsWork() { + } + + public BasicStatsWork(TableSpec tableSpecs) { + this.tableSpecs = tableSpecs; + } + + public BasicStatsWork(LoadTableDesc loadTableDesc) { + this.loadTableDesc = loadTableDesc; + } + + public BasicStatsWork(LoadFileDesc loadFileDesc) { + this.loadFileDesc = loadFileDesc; + } + + public TableSpec getTableSpecs() { + return tableSpecs; + } + + public LoadTableDesc getLoadTableDesc() { + return loadTableDesc; + } + + public LoadFileDesc getLoadFileDesc() { + return loadFileDesc; + } + + public void setAggKey(String aggK) { + aggKey = aggK; + } + + @Explain(displayName = "Stats Aggregation Key Prefix", explainLevels = { Level.EXTENDED }) + public String getAggKey() { + return aggKey; + } + + public String getStatsTmpDir() { + return statsTmpDir; + } + + public void setStatsTmpDir(String statsTmpDir) { + this.statsTmpDir = statsTmpDir; + } + + public boolean getNoStatsAggregator() { + return noStatsAggregator; + } + + public void setNoStatsAggregator(boolean noStatsAggregator) { + this.noStatsAggregator = noStatsAggregator; + } + + public boolean isStatsReliable() { + return statsReliable; + } + + public void setStatsReliable(boolean statsReliable) { + this.statsReliable = statsReliable; + } + + public boolean isClearAggregatorStats() { + return clearAggregatorStats; + } + + public void setClearAggregatorStats(boolean clearAggregatorStats) { + this.clearAggregatorStats = clearAggregatorStats; + } + + /** + * @return the isNoScanAnalyzeCommand + */ + public boolean isNoScanAnalyzeCommand() { + return isNoScanAnalyzeCommand; + } + + /** + * @param isNoScanAnalyzeCommand the isNoScanAnalyzeCommand to set + */ + public void setNoScanAnalyzeCommand(boolean isNoScanAnalyzeCommand) { + this.isNoScanAnalyzeCommand = isNoScanAnalyzeCommand; + } + + /** + * @return the isPartialScanAnalyzeCommand + */ + public boolean isPartialScanAnalyzeCommand() { + return isPartialScanAnalyzeCommand; + } + + /** + * @param isPartialScanAnalyzeCommand the isPartialScanAnalyzeCommand to set + */ + public void setPartialScanAnalyzeCommand(boolean isPartialScanAnalyzeCommand) { + this.isPartialScanAnalyzeCommand = isPartialScanAnalyzeCommand; + } + + public Task getSourceTask() { + return sourceTask; + } + + public void setSourceTask(Task sourceTask) { + this.sourceTask = sourceTask; + } + + public boolean isFollowedByColStats() { + return isFollowedByColStats; + } + + public void setFollowedByColStats(boolean isFollowedByColStats) { + this.isFollowedByColStats = isFollowedByColStats; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java index 97f323f4b7..a756a29d8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java @@ -30,6 +30,7 @@ private static final long serialVersionUID = 1L; private boolean isTblLevel; private int numBitVector; + private boolean needMerge; private String tableName; private List colName; private List colType; @@ -44,6 +45,7 @@ public ColumnStatsDesc(String tableName, List colName, List colT this.colType = colType; this.isTblLevel = isTblLevel; this.numBitVector = 0; + this.needMerge = false; } public ColumnStatsDesc(String tableName, List colName, @@ -53,6 +55,7 @@ public ColumnStatsDesc(String tableName, List colName, this.colType = colType; this.isTblLevel = isTblLevel; this.numBitVector = numBitVector; + this.needMerge = this.numBitVector != 0; } @Explain(displayName = "Table") @@ -99,4 +102,8 @@ public void setNumBitVector(int numBitVector) { this.numBitVector = numBitVector; } + public boolean isNeedMerge() { + return needMerge; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java deleted file mode 100644 index 76811b1a93..0000000000 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.plan; - -import java.io.Serializable; - -import org.apache.hadoop.hive.ql.CompilationOpContext; -import org.apache.hadoop.hive.ql.exec.ListSinkOperator; -import org.apache.hadoop.hive.ql.plan.Explain.Level; - - -/** - * ColumnStats Work. - * - */ -@Explain(displayName = "Column Stats Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) -public class ColumnStatsWork implements Serializable { - private static final long serialVersionUID = 1L; - private FetchWork fWork; - private ColumnStatsDesc colStats; - private static final int LIMIT = -1; - - - public ColumnStatsWork() { - } - - public ColumnStatsWork(FetchWork work, ColumnStatsDesc colStats) { - this.fWork = work; - this.setColStats(colStats); - } - - @Override - public String toString() { - String ret; - ret = fWork.toString(); - return ret; - } - - public FetchWork getfWork() { - return fWork; - } - - public void setfWork(FetchWork fWork) { - this.fWork = fWork; - } - - @Explain(displayName = "Column Stats Desc") - public ColumnStatsDesc getColStats() { - return colStats; - } - - public void setColStats(ColumnStatsDesc colStats) { - this.colStats = colStats; - } - - public ListSinkOperator getSink() { - return fWork.getSink(); - } - - public void initializeForFetch(CompilationOpContext ctx) { - fWork.initializeForFetch(ctx); - } - - public int getLeastNumRows() { - return fWork.getLeastNumRows(); - } - - public static int getLimit() { - return LIMIT; - } - -} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java index a5050c5368..e6120077a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/StatsWork.java @@ -20,151 +20,99 @@ import java.io.Serializable; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.plan.Explain.Level; - /** - * ConditionalStats. + * Stats Work, may include basic stats work and column stats desc * */ -@Explain(displayName = "Stats-Aggr Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +@Explain(displayName = "Stats Work", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }) public class StatsWork implements Serializable { private static final long serialVersionUID = 1L; - - private TableSpec tableSpecs; // source table spec -- for TableScanOperator - private LoadTableDesc loadTableDesc; // same as MoveWork.loadTableDesc -- for FileSinkOperator - private LoadFileDesc loadFileDesc; // same as MoveWork.loadFileDesc -- for FileSinkOperator - private String aggKey; // aggregation key prefix - private boolean statsReliable; // are stats completely reliable - - // If stats aggregator is not present, clear the current aggregator stats. - // For eg. if a merge is being performed, stats already collected by aggregator (numrows etc.) - // are still valid. However, if a load file is being performed, the old stats collected by - // aggregator are not valid. It might be a good idea to clear them instead of leaving wrong - // and old stats. - // Since HIVE-12661, we maintain the old stats (although may be wrong) for CBO - // purpose. We use a flag COLUMN_STATS_ACCURATE to - // show the accuracy of the stats. - - private boolean clearAggregatorStats = false; - - private boolean noStatsAggregator = false; - - private boolean isNoScanAnalyzeCommand = false; - - private boolean isPartialScanAnalyzeCommand = false; - - // sourceTask for TS is not changed (currently) but that of FS might be changed - // by various optimizers (auto.convert.join, for example) - // so this is set by DriverContext in runtime - private transient Task sourceTask; - - // used by FS based stats collector - private String statsTmpDir; + // this is for basic stats + private BasicStatsWork basicStatsWork; + private BasicStatsNoJobWork basicStatsNoJobWork; + private FetchWork fWork; + private ColumnStatsDesc colStats; + private static final int LIMIT = -1; public StatsWork() { } - public StatsWork(TableSpec tableSpecs) { - this.tableSpecs = tableSpecs; - } - - public StatsWork(LoadTableDesc loadTableDesc) { - this.loadTableDesc = loadTableDesc; - } - - public StatsWork(LoadFileDesc loadFileDesc) { - this.loadFileDesc = loadFileDesc; - } - - public TableSpec getTableSpecs() { - return tableSpecs; + public StatsWork(BasicStatsWork basicStatsWork) { + super(); + this.basicStatsWork = basicStatsWork; } - public LoadTableDesc getLoadTableDesc() { - return loadTableDesc; + public StatsWork(BasicStatsNoJobWork basicStatsNoJobWork) { + super(); + this.basicStatsNoJobWork = basicStatsNoJobWork; } - public LoadFileDesc getLoadFileDesc() { - return loadFileDesc; + public StatsWork(FetchWork work, ColumnStatsDesc colStats) { + this.fWork = work; + this.setColStats(colStats); } - public void setAggKey(String aggK) { - aggKey = aggK; + @Override + public String toString() { + String ret; + ret = fWork.toString(); + return ret; } - @Explain(displayName = "Stats Aggregation Key Prefix", explainLevels = { Level.EXTENDED }) - public String getAggKey() { - return aggKey; + public FetchWork getfWork() { + return fWork; } - public String getStatsTmpDir() { - return statsTmpDir; + public void setfWork(FetchWork fWork) { + this.fWork = fWork; } - public void setStatsTmpDir(String statsTmpDir) { - this.statsTmpDir = statsTmpDir; + @Explain(displayName = "Column Stats Desc") + public ColumnStatsDesc getColStats() { + return colStats; } - public boolean getNoStatsAggregator() { - return noStatsAggregator; + public void setColStats(ColumnStatsDesc colStats) { + this.colStats = colStats; } - public void setNoStatsAggregator(boolean noStatsAggregator) { - this.noStatsAggregator = noStatsAggregator; + public ListSinkOperator getSink() { + return fWork.getSink(); } - public boolean isStatsReliable() { - return statsReliable; + public void initializeForFetch(CompilationOpContext ctx) { + fWork.initializeForFetch(ctx); } - public void setStatsReliable(boolean statsReliable) { - this.statsReliable = statsReliable; + public int getLeastNumRows() { + return fWork.getLeastNumRows(); } - public boolean isClearAggregatorStats() { - return clearAggregatorStats; + public static int getLimit() { + return LIMIT; } - public void setClearAggregatorStats(boolean clearAggregatorStats) { - this.clearAggregatorStats = clearAggregatorStats; + @Explain(displayName = "Basic Stats Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public BasicStatsWork getBasicStatsWork() { + return basicStatsWork; } - /** - * @return the isNoScanAnalyzeCommand - */ - public boolean isNoScanAnalyzeCommand() { - return isNoScanAnalyzeCommand; + public void setBasicStatsWork(BasicStatsWork basicStatsWork) { + this.basicStatsWork = basicStatsWork; } - /** - * @param isNoScanAnalyzeCommand the isNoScanAnalyzeCommand to set - */ - public void setNoScanAnalyzeCommand(boolean isNoScanAnalyzeCommand) { - this.isNoScanAnalyzeCommand = isNoScanAnalyzeCommand; + @Explain(displayName = "Basic Stats NoJob Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) + public BasicStatsNoJobWork getBasicStatsNoJobWork() { + return basicStatsNoJobWork; } - /** - * @return the isPartialScanAnalyzeCommand - */ - public boolean isPartialScanAnalyzeCommand() { - return isPartialScanAnalyzeCommand; + public void setBasicStatsNoJobWork(BasicStatsNoJobWork basicStatsNoJobWork) { + this.basicStatsNoJobWork = basicStatsNoJobWork; } - /** - * @param isPartialScanAnalyzeCommand the isPartialScanAnalyzeCommand to set - */ - public void setPartialScanAnalyzeCommand(boolean isPartialScanAnalyzeCommand) { - this.isPartialScanAnalyzeCommand = isPartialScanAnalyzeCommand; - } - - public Task getSourceTask() { - return sourceTask; - } - - public void setSourceTask(Task sourceTask) { - this.sourceTask = sourceTask; - } } diff --git ql/src/java/xl/DebugHelper.java ql/src/java/xl/DebugHelper.java new file mode 100644 index 0000000000..b021eee695 --- /dev/null +++ ql/src/java/xl/DebugHelper.java @@ -0,0 +1,21 @@ +package xl; + +public class DebugHelper { + + public static String INTEREST_PATTERN = "partitioned1"; + public static String query = null; + public static boolean interesting = false; + public static int i0 = 0; + public static int i1 = 0; + public static int i2 = 0; + + public static boolean isQueryIntresting1(String q0) { + query = q0; + interesting = query.contains(INTEREST_PATTERN); + return isQueryIntresting(); + } + + public static boolean isQueryIntresting() { + return interesting; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 4f1c7d8b1e..44ce74d8bb 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -853,4 +853,4 @@ public void testMoreBucketsThanReducers2() throws Exception { int[][] expected = {{0, -1},{0, -1}, {1, -1}, {1, -1}, {2, -1}, {2, -1}, {3, -1}, {3, -1}}; Assert.assertEquals(stringifyValues(expected), r); } -} \ No newline at end of file +} diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 21b4a2ce55..ad8d39244b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -129,6 +129,7 @@ protected void setUpWithTableProperties(String tableProperties) throws Exception .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(); diff --git ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java index ad2aac5f56..9062d924c4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java +++ ql/src/test/org/apache/hadoop/hive/ql/TxnCommandsBaseForTests.java @@ -64,6 +64,7 @@ void setUpInternal() throws Exception { .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(); File f = new File(getWarehouseDir()); diff --git ql/src/test/queries/clientpositive/autoColumnStats_1.q ql/src/test/queries/clientpositive/autoColumnStats_1.q index 7955b07233..cc32393e65 100644 --- ql/src/test/queries/clientpositive/autoColumnStats_1.q +++ ql/src/test/queries/clientpositive/autoColumnStats_1.q @@ -60,6 +60,8 @@ drop table nzhang_part14; create table if not exists nzhang_part14 (key string) partitioned by (value string); +desc formatted nzhang_part14; + insert overwrite table nzhang_part14 partition(value) select key, value from ( select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a @@ -69,6 +71,8 @@ select key, value from ( select * from (select 'k3' as key, ' ' as value from src limit 2)c ) T; +desc formatted nzhang_part14 partition (value=' '); + explain select key from nzhang_part14; diff --git ql/src/test/queries/clientpositive/autoColumnStats_10.q ql/src/test/queries/clientpositive/autoColumnStats_10.q new file mode 100644 index 0000000000..bf166d8701 --- /dev/null +++ ql/src/test/queries/clientpositive/autoColumnStats_10.q @@ -0,0 +1,52 @@ +set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=true; + +drop table p; + +CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint); + +desc formatted p; + +insert into p values (1,22,333); + +desc formatted p; + +alter table p replace columns (insert_num int, c1 STRING, c2 STRING); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; + +insert into p values (2,11,111); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; + +set hive.stats.column.autogather=false; + +drop table p; + +CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint); + +desc formatted p; + +insert into p values (1,22,333); + +desc formatted p; + +alter table p replace columns (insert_num int, c1 STRING, c2 STRING); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; + +insert into p values (2,11,111); + +desc formatted p; + +desc formatted p insert_num; +desc formatted p c1; diff --git ql/src/test/queries/clientpositive/autoColumnStats_3a.q ql/src/test/queries/clientpositive/autoColumnStats_3a.q new file mode 100644 index 0000000000..516d6b89b3 --- /dev/null +++ ql/src/test/queries/clientpositive/autoColumnStats_3a.q @@ -0,0 +1,34 @@ +set hive.stats.column.autogather=false; +set hive.stats.fetch.column.stats=true; +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.auto.convert.join=true; +set hive.join.emit.interval=2; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; +set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ; +set hive.optimize.bucketingsorting=false; + +set hive.stats.column.autogather=true; + +create table if not exists nzhang_part14 (key string, value string) + partitioned by (ds string, hr string); + + +desc formatted nzhang_part14; +analyze table nzhang_part14 partition (ds,hr) compute statistics for columns; +desc formatted nzhang_part14; + +insert into table nzhang_part14 partition(ds, hr) +select key, value, ds, hr from ( + select '1' as ds,'3' as hr,'k' as key, 'v' as value + union all + select '2' as ds,'1' as hr,'k' as key, 'v' as value +) T; + +desc formatted nzhang_part14 partition(ds='1', hr='3'); + +desc formatted nzhang_part14 partition(ds='2', hr='1'); + +desc formatted nzhang_part14; + diff --git ql/src/test/queries/clientpositive/bucket_map_join_tez2.q ql/src/test/queries/clientpositive/bucket_map_join_tez2.q index 37989ecc9d..c600f639da 100644 --- ql/src/test/queries/clientpositive/bucket_map_join_tez2.q +++ ql/src/test/queries/clientpositive/bucket_map_join_tez2.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.strict.checks.bucketing=false; set hive.mapred.mode=nonstrict; diff --git ql/src/test/queries/clientpositive/bucket_num_reducers.q ql/src/test/queries/clientpositive/bucket_num_reducers.q index 06f334e833..5c5008eea7 100644 --- ql/src/test/queries/clientpositive/bucket_num_reducers.q +++ ql/src/test/queries/clientpositive/bucket_num_reducers.q @@ -1,4 +1,4 @@ -; +set hive.stats.column.autogather=false; set hive.exec.mode.local.auto=false; set mapred.reduce.tasks = 10; diff --git ql/src/test/queries/clientpositive/combine1.q ql/src/test/queries/clientpositive/combine1.q index 3bcb8b19c1..b300830884 100644 --- ql/src/test/queries/clientpositive/combine1.q +++ ql/src/test/queries/clientpositive/combine1.q @@ -7,6 +7,8 @@ set mapred.max.split.size=256; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; +set hive.stats.column.autogather=false; + -- SORT_QUERY_RESULTS create table combine1_1(key string, value string) stored as textfile; diff --git ql/src/test/queries/clientpositive/correlationoptimizer5.q ql/src/test/queries/clientpositive/correlationoptimizer5.q index 45b8cb955d..002fb12e22 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer5.q +++ ql/src/test/queries/clientpositive/correlationoptimizer5.q @@ -1,3 +1,5 @@ +set hive.stats.column.autogather=false; +-- Currently, a query with multiple FileSinkOperators are not supported. set hive.mapred.mode=nonstrict; CREATE TABLE T1(key INT, val STRING); LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1; diff --git ql/src/test/queries/clientpositive/encryption_insert_values.q ql/src/test/queries/clientpositive/encryption_insert_values.q index 2dd3e9ad1d..c8d1d519f3 100644 --- ql/src/test/queries/clientpositive/encryption_insert_values.q +++ ql/src/test/queries/clientpositive/encryption_insert_values.q @@ -1,4 +1,5 @@ -- SORT_QUERY_RESULTS; +set hive.stats.column.autogather=false; DROP TABLE IF EXISTS encrypted_table PURGE; CREATE TABLE encrypted_table (key INT, value STRING) LOCATION '${hiveconf:hive.metastore.warehouse.dir}/default/encrypted_table'; @@ -12,4 +13,4 @@ select * from encrypted_table; -- this checks that we've actually created temp table data under encrypted_table folder describe formatted values__tmp__table__1; -CRYPTO DELETE_KEY --keyName key_128; \ No newline at end of file +CRYPTO DELETE_KEY --keyName key_128; diff --git ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q index 4dcea1f7ce..7159ad5995 100644 --- ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q +++ ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; --SORT_QUERY_RESULTS -- Java JCE must be installed in order to hava a key length of 256 bits diff --git ql/src/test/queries/clientpositive/encryption_move_tbl.q ql/src/test/queries/clientpositive/encryption_move_tbl.q index 0b7771cc4a..8d865aa6e8 100644 --- ql/src/test/queries/clientpositive/encryption_move_tbl.q +++ ql/src/test/queries/clientpositive/encryption_move_tbl.q @@ -1,4 +1,5 @@ -- SORT_QUERY_RESULTS; +set hive.stats.column.autogather=false; -- we're setting this so that TestNegaiveCliDriver.vm doesn't stop processing after ALTER TABLE fails; diff --git ql/src/test/queries/clientpositive/exec_parallel_column_stats.q ql/src/test/queries/clientpositive/exec_parallel_column_stats.q index ceacc2442d..a89b707918 100644 --- ql/src/test/queries/clientpositive/exec_parallel_column_stats.q +++ ql/src/test/queries/clientpositive/exec_parallel_column_stats.q @@ -1,5 +1,7 @@ set hive.exec.parallel=true; -explain analyze table src compute statistics for columns; +create table t as select * from src; -analyze table src compute statistics for columns; \ No newline at end of file +explain analyze table t compute statistics for columns; + +analyze table t compute statistics for columns; diff --git ql/src/test/queries/clientpositive/groupby1.q ql/src/test/queries/clientpositive/groupby1.q index a8c9a8dcf8..cd3a12b44e 100755 --- ql/src/test/queries/clientpositive/groupby1.q +++ ql/src/test/queries/clientpositive/groupby1.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr=false; diff --git ql/src/test/queries/clientpositive/groupby1_limit.q ql/src/test/queries/clientpositive/groupby1_limit.q index b8e389e511..6c40e19540 100644 --- ql/src/test/queries/clientpositive/groupby1_limit.q +++ ql/src/test/queries/clientpositive/groupby1_limit.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set mapred.reduce.tasks=31; diff --git ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q index 2b799f87eb..40976ee707 100644 --- ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q +++ ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q @@ -1,3 +1,6 @@ +set hive.stats.column.autogather=false; +-- due to L137 in LimitPushDownOptimization Not safe to continue for RS-GBY-GBY-LIM kind of pipelines. See HIVE-10607 for more. + set hive.multigroupby.singlereducer=true; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q index 9c19a8678c..ec34e7a555 100644 --- ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q +++ ql/src/test/queries/clientpositive/hybridgrace_hashjoin_1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; -- Hybrid Grace Hash Join diff --git ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q index 6809b721be..e4170283f3 100644 --- ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q +++ ql/src/test/queries/clientpositive/infer_bucket_sort_convert_join.q @@ -1,3 +1,12 @@ +set hive.stats.column.autogather=false; +-- sounds weird: +-- on master, when auto=true, hive.mapjoin.localtask.max.memory.usage will be 0.55 as there is a gby +-- L132 of LocalMapJoinProcFactory +-- when execute in CLI, hive.exec.submit.local.task.via.child is true and we can see the error +-- if set hive.exec.submit.local.task.via.child=false, we can see it. +-- with patch, we just merge the tasks. hive.exec.submit.local.task.via.child=false due to pom.xml setting +-- however, even after change it to true, it still fails. + set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q index 6824c1c032..c0ddb8bce6 100644 --- ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q +++ ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git ql/src/test/queries/clientpositive/input11_limit.q ql/src/test/queries/clientpositive/input11_limit.q index 052a72ee68..211c37adc5 100644 --- ql/src/test/queries/clientpositive/input11_limit.q +++ ql/src/test/queries/clientpositive/input11_limit.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/input14_limit.q ql/src/test/queries/clientpositive/input14_limit.q index 7316752a6d..2f6e4e47c9 100644 --- ql/src/test/queries/clientpositive/input14_limit.q +++ ql/src/test/queries/clientpositive/input14_limit.q @@ -1,3 +1,5 @@ +set hive.stats.column.autogather=false; + CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; EXPLAIN diff --git ql/src/test/queries/clientpositive/join2.q ql/src/test/queries/clientpositive/join2.q index 8aedd561e2..c3c7c241e9 100644 --- ql/src/test/queries/clientpositive/join2.q +++ ql/src/test/queries/clientpositive/join2.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git ql/src/test/queries/clientpositive/metadata_only_queries.q ql/src/test/queries/clientpositive/metadata_only_queries.q index 8581a46b2d..bcf320b0c5 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries.q +++ ql/src/test/queries/clientpositive/metadata_only_queries.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.compute.query.using.stats=true; diff --git ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q index 1af813e3ed..692c414354 100644 --- ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q +++ ql/src/test/queries/clientpositive/metadata_only_queries_with_filters.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.stats.dbclass=fs; set hive.compute.query.using.stats=true; set hive.explain.user=false; diff --git ql/src/test/queries/clientpositive/multiMapJoin1.q ql/src/test/queries/clientpositive/multiMapJoin1.q index 5c49b4c64f..6e16af4617 100644 --- ql/src/test/queries/clientpositive/multiMapJoin1.q +++ ql/src/test/queries/clientpositive/multiMapJoin1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.apache.hadoop.hive.ql.hooks.PrintCompletedTasksHook; diff --git ql/src/test/queries/clientpositive/orc_wide_table.q ql/src/test/queries/clientpositive/orc_wide_table.q index 422a3c24b1..d2ec3857d0 100644 --- ql/src/test/queries/clientpositive/orc_wide_table.q +++ ql/src/test/queries/clientpositive/orc_wide_table.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=false; drop table if exists test_txt; drop table if exists test_orc; create table test_txt( diff --git ql/src/test/queries/clientpositive/partition_coltype_literals.q ql/src/test/queries/clientpositive/partition_coltype_literals.q index eb56b1a93d..8da4876b70 100644 --- ql/src/test/queries/clientpositive/partition_coltype_literals.q +++ ql/src/test/queries/clientpositive/partition_coltype_literals.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.compute.query.using.stats=false; drop table if exists partcoltypenum; create table partcoltypenum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint); diff --git ql/src/test/queries/clientpositive/row__id.q ql/src/test/queries/clientpositive/row__id.q index d9cb7b0ff6..6aaa40f68f 100644 --- ql/src/test/queries/clientpositive/row__id.q +++ ql/src/test/queries/clientpositive/row__id.q @@ -1,3 +1,5 @@ +-- tid is flaky when compute column stats +set hive.stats.column.autogather=false; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; drop table if exists hello_acid; diff --git ql/src/test/queries/clientpositive/smb_join_partition_key.q ql/src/test/queries/clientpositive/smb_join_partition_key.q index 160bf5e36a..23027f8aa5 100644 --- ql/src/test/queries/clientpositive/smb_join_partition_key.q +++ ql/src/test/queries/clientpositive/smb_join_partition_key.q @@ -1,3 +1,5 @@ +--because p1 is decimal, in derby, when it retrieves partition with decimal, it will use partval = 100.0, rather than 100. As a result, the partition will not be found and it throws exception. +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; SET hive.enforce.sortmergebucketmapjoin=false; SET hive.auto.convert.sortmerge.join=true; diff --git ql/src/test/queries/clientpositive/stats15a.q ql/src/test/queries/clientpositive/stats15a.q new file mode 100644 index 0000000000..194e02c4f3 --- /dev/null +++ ql/src/test/queries/clientpositive/stats15a.q @@ -0,0 +1,25 @@ +set datanucleus.cache.collections=false; + +create table my_src (key string, value string); +insert into my_src values ('1','2'),('3','4'); + +create table my_srcpart (key string, value string) partitioned by (ds string,hr string); + +create table stats_part like my_srcpart; + +insert overwrite table stats_part partition (ds='2010-04-08', hr = '11') select key, value from my_src; +insert overwrite table stats_part partition (ds='2010-04-08', hr = '12') select key, value from my_src; + +analyze table stats_part partition(ds='2010-04-08', hr='11') compute statistics; +analyze table stats_part partition(ds='2010-04-08', hr='12') compute statistics; + +insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from my_src; + +desc formatted stats_part; +desc formatted stats_part partition (ds='2010-04-08', hr = '11'); +desc formatted stats_part partition (ds='2010-04-08', hr = '12'); + +analyze table stats_part partition(ds, hr) compute statistics; +desc formatted stats_part; + +drop table stats_part; diff --git ql/src/test/queries/clientpositive/stats_noscan_2a.q ql/src/test/queries/clientpositive/stats_noscan_2a.q new file mode 100644 index 0000000000..638f33d27a --- /dev/null +++ ql/src/test/queries/clientpositive/stats_noscan_2a.q @@ -0,0 +1,30 @@ +set hive.stats.autogather=true; + +dfs -cp ${system:hive.root}/data/files/ext_test ${system:test.tmp.dir}/analyze_external; +dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/texternal/2008-01-01; + +create external table t0 (key string,val string) location 'pfile://${system:test.tmp.dir}/texternal/2008-01-01'; +explain insert into t0 values ('a','b'),('a','b'), ('a','b'); +insert into t0 values ('a','b'),('a','b'), ('a','b'); +drop table t0; + +-- create external table +CREATE external TABLE anaylyze_external (key string, val string) partitioned by (insertdate string) LOCATION "pfile://${system:test.tmp.dir}/texternal"; +describe formatted anaylyze_external; +explain ALTER TABLE anaylyze_external ADD PARTITION (insertdate='2008-01-01') location 'pfile://${system:test.tmp.dir}/texternal/2008-01-01'; +ALTER TABLE anaylyze_external ADD PARTITION (insertdate='2008-01-01') location 'pfile://${system:test.tmp.dir}/texternal/2008-01-01'; +describe formatted anaylyze_external PARTITION (insertdate='2008-01-01'); +explain select count(*) from anaylyze_external where insertdate='2008-01-01'; +select count(*) from anaylyze_external where insertdate='2008-01-01'; +select count(*) from anaylyze_external; + +-- analyze +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics noscan; +describe formatted anaylyze_external PARTITION (insertdate='2008-01-01'); +analyze table anaylyze_external PARTITION (insertdate='2008-01-01') compute statistics; +describe formatted anaylyze_external PARTITION (insertdate='2008-01-01'); +dfs -rmr ${system:test.tmp.dir}/texternal; +drop table anaylyze_external; + + + diff --git ql/src/test/queries/clientpositive/udf_round_2.q ql/src/test/queries/clientpositive/udf_round_2.q index 43988c1225..38885a97d4 100644 --- ql/src/test/queries/clientpositive/udf_round_2.q +++ ql/src/test/queries/clientpositive/udf_round_2.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion=more; +set hive.stats.column.autogather=false; -- test for NaN (not-a-number) create table tstTbl1(n double); diff --git ql/src/test/queries/clientpositive/vector_groupby_rollup1.q ql/src/test/queries/clientpositive/vector_groupby_rollup1.q index e08f8b9393..c2441c6864 100644 --- ql/src/test/queries/clientpositive/vector_groupby_rollup1.q +++ ql/src/test/queries/clientpositive/vector_groupby_rollup1.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; SET hive.vectorized.execution.reduce.enabled=true; diff --git ql/src/test/queries/clientpositive/vector_multi_insert.q ql/src/test/queries/clientpositive/vector_multi_insert.q index c56ee1c4aa..e6bfb96794 100644 --- ql/src/test/queries/clientpositive/vector_multi_insert.q +++ ql/src/test/queries/clientpositive/vector_multi_insert.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_udf_character_length.q ql/src/test/queries/clientpositive/vector_udf_character_length.q index 19a5260ddc..e49a091b34 100644 --- ql/src/test/queries/clientpositive/vector_udf_character_length.q +++ ql/src/test/queries/clientpositive/vector_udf_character_length.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_udf_octet_length.q ql/src/test/queries/clientpositive/vector_udf_octet_length.q index 06a49852a2..af4c7c4a7f 100644 --- ql/src/test/queries/clientpositive/vector_udf_octet_length.q +++ ql/src/test/queries/clientpositive/vector_udf_octet_length.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q index 80f84d8b9f..b3402d0df2 100644 --- ql/src/test/queries/clientpositive/vector_varchar_4.q +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; @@ -50,4 +51,4 @@ insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s -- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; --- select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file +-- select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q index 6f753a748d..352ec3aebc 100644 --- ql/src/test/queries/clientpositive/vector_varchar_simple.q +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; diff --git ql/src/test/results/clientnegative/fileformat_void_input.q.out ql/src/test/results/clientnegative/fileformat_void_input.q.out index 6043258506..59242ea2f4 100644 --- ql/src/test/results/clientnegative/fileformat_void_input.q.out +++ ql/src/test/results/clientnegative/fileformat_void_input.q.out @@ -10,16 +10,4 @@ POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException 3:20 Input format must implement InputFormat. Error encountered near token 'dest1' +FAILED: SemanticException 1:72 Input format must implement InputFormat. Error encountered near token 'dest1' diff --git ql/src/test/results/clientpositive/acid_table_stats.q.out ql/src/test/results/clientpositive/acid_table_stats.q.out index 6ab6b4346d..eafdb64979 100644 --- ql/src/test/results/clientpositive/acid_table_stats.q.out +++ ql/src/test/results/clientpositive/acid_table_stats.q.out @@ -232,11 +232,15 @@ PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Output: default@acid +PREHOOK: Output: default@acid@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Output: default@acid +POSTHOOK: Output: default@acid@ds=2008-04-08 #### A masked pattern was here #### PREHOOK: query: desc formatted acid partition(ds='2008-04-08') PREHOOK: type: DESCTABLE @@ -514,11 +518,15 @@ PREHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@acid PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Output: default@acid +PREHOOK: Output: default@acid@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: analyze table acid partition(ds='2008-04-08') compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@acid POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Output: default@acid +POSTHOOK: Output: default@acid@ds=2008-04-08 #### A masked pattern was here #### PREHOOK: query: explain select max(key) from acid where ds='2008-04-08' PREHOOK: type: QUERY @@ -668,7 +676,7 @@ Database: default Table: acid #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 176000 diff --git ql/src/test/results/clientpositive/alterColumnStats.q.out ql/src/test/results/clientpositive/alterColumnStats.q.out index ea2416f6fb..f8c19b0274 100644 --- ql/src/test/results/clientpositive/alterColumnStats.q.out +++ ql/src/test/results/clientpositive/alterColumnStats.q.out @@ -74,7 +74,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} numFiles 1 numRows 1 rawDataSize 8 @@ -118,7 +118,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 1 @@ -145,7 +145,7 @@ POSTHOOK: Input: default@p # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector c1 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} PREHOOK: query: desc formatted p c2 PREHOOK: type: DESCTABLE PREHOOK: Input: default@p @@ -155,4 +155,4 @@ POSTHOOK: Input: default@p # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector c2 string from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} diff --git ql/src/test/results/clientpositive/alterColumnStatsPart.q.out ql/src/test/results/clientpositive/alterColumnStatsPart.q.out index a315a6be39..dc01c2b3cf 100644 --- ql/src/test/results/clientpositive/alterColumnStatsPart.q.out +++ ql/src/test/results/clientpositive/alterColumnStatsPart.q.out @@ -65,9 +65,9 @@ PREHOOK: Input: default@p POSTHOOK: query: desc formatted p partition (c=1) a POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@p -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -a int from deserializer +a int 1 1 0 1 HL from deserializer PREHOOK: query: desc formatted p partition (c=1) PREHOOK: type: DESCTABLE PREHOOK: Input: default@p @@ -90,7 +90,7 @@ Database: default Table: p #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 1 numRows 1 rawDataSize 3 @@ -112,46 +112,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(a) from p where c=1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: p - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(a) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -159,11 +125,15 @@ PREHOOK: query: analyze table p partition(c=1) compute statistics for columns a PREHOOK: type: QUERY PREHOOK: Input: default@p PREHOOK: Input: default@p@c=1 +PREHOOK: Output: default@p +PREHOOK: Output: default@p@c=1 #### A masked pattern was here #### POSTHOOK: query: analyze table p partition(c=1) compute statistics for columns a POSTHOOK: type: QUERY POSTHOOK: Input: default@p POSTHOOK: Input: default@p@c=1 +POSTHOOK: Output: default@p +POSTHOOK: Output: default@p@c=1 #### A masked pattern was here #### PREHOOK: query: explain select max(a) from p where c=1 PREHOOK: type: QUERY @@ -217,7 +187,7 @@ Database: default Table: p #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 1 numRows 1 rawDataSize 3 @@ -239,46 +209,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(a) from p where c=4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: p - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: a - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(a) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -380,11 +316,15 @@ PREHOOK: query: analyze table p partition(c=100) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@p PREHOOK: Input: default@p@c=100 +PREHOOK: Output: default@p +PREHOOK: Output: default@p@c=100 #### A masked pattern was here #### POSTHOOK: query: analyze table p partition(c=100) compute statistics for columns a POSTHOOK: type: QUERY POSTHOOK: Input: default@p POSTHOOK: Input: default@p@c=100 +POSTHOOK: Output: default@p +POSTHOOK: Output: default@p@c=100 #### A masked pattern was here #### PREHOOK: query: explain select max(a) from p where c=100 PREHOOK: type: QUERY @@ -485,11 +425,15 @@ PREHOOK: query: analyze table p partition(c=100) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@p PREHOOK: Input: default@p@c=100 +PREHOOK: Output: default@p +PREHOOK: Output: default@p@c=100 #### A masked pattern was here #### POSTHOOK: query: analyze table p partition(c=100) compute statistics for columns a POSTHOOK: type: QUERY POSTHOOK: Input: default@p POSTHOOK: Input: default@p@c=100 +POSTHOOK: Output: default@p +POSTHOOK: Output: default@p@c=100 #### A masked pattern was here #### PREHOOK: query: explain select max(a) from p where c=100 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index 12dcc11794..fc61bc6f6d 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -178,7 +178,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -277,7 +277,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -376,7 +376,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -475,7 +475,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -574,7 +574,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -673,7 +673,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -772,7 +772,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -871,7 +871,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index 944482c6d5..5b9ba69e9c 100644 --- ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -88,7 +88,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -145,7 +145,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -275,7 +275,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -364,7 +364,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 12 numRows 500 diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 426a4de206..61731254dc 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -232,7 +232,7 @@ STAGE PLANS: dt 100 ts 3.0 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -278,7 +278,7 @@ STAGE PLANS: dt 100 ts 6.30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -469,7 +469,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -538,7 +538,7 @@ STAGE PLANS: partcol1 2 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git ql/src/test/results/clientpositive/alter_partition_update_status.q.out ql/src/test/results/clientpositive/alter_partition_update_status.q.out index caa1f8dd32..15aab1e64c 100644 --- ql/src/test/results/clientpositive/alter_partition_update_status.q.out +++ ql/src/test/results/clientpositive/alter_partition_update_status.q.out @@ -22,11 +22,15 @@ PREHOOK: query: ANALYZE TABLE src_stat_part_one PARTITION(partitionId=1) COMPUTE PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part_one PREHOOK: Input: default@src_stat_part_one@partitionid=1 +PREHOOK: Output: default@src_stat_part_one +PREHOOK: Output: default@src_stat_part_one@partitionid=1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part_one PARTITION(partitionId=1) COMPUTE STATISTICS for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part_one POSTHOOK: Input: default@src_stat_part_one@partitionid=1 +POSTHOOK: Output: default@src_stat_part_one +POSTHOOK: Output: default@src_stat_part_one@partitionid=1 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part_one PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE @@ -74,11 +78,15 @@ PREHOOK: query: ANALYZE TABLE src_stat_part_two PARTITION(px=1) COMPUTE STATISTI PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part_two PREHOOK: Input: default@src_stat_part_two@px=1/py=a +PREHOOK: Output: default@src_stat_part_two +PREHOOK: Output: default@src_stat_part_two@px=1/py=a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part_two PARTITION(px=1) COMPUTE STATISTICS for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part_two POSTHOOK: Input: default@src_stat_part_two@px=1/py=a +POSTHOOK: Output: default@src_stat_part_two +POSTHOOK: Output: default@src_stat_part_two@px=1/py=a #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part_two PARTITION(px=1, py='a') key PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/alter_table_add_partition.q.out ql/src/test/results/clientpositive/alter_table_add_partition.q.out index c96e5b43b7..3a3b38d92a 100644 --- ql/src/test/results/clientpositive/alter_table_add_partition.q.out +++ ql/src/test/results/clientpositive/alter_table_add_partition.q.out @@ -199,7 +199,7 @@ Database: default Table: mp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 diff --git ql/src/test/results/clientpositive/alter_table_column_stats.q.out ql/src/test/results/clientpositive/alter_table_column_stats.q.out index b93e9de1c1..f87d9e52df 100644 --- ql/src/test/results/clientpositive/alter_table_column_stats.q.out +++ ql/src/test/results/clientpositive/alter_table_column_stats.q.out @@ -77,10 +77,12 @@ PREHOOK: query: analyze table testtable0 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable0 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable0 POSTHOOK: query: analyze table testtable0 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable0 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable0 PREHOOK: query: describe formatted statsdb1.testtable0 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 @@ -455,12 +457,18 @@ PREHOOK: Input: statsdb1@testpart0 PREHOOK: Input: statsdb1@testpart0@part=part1 PREHOOK: Input: statsdb1@testpart0@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart0 +PREHOOK: Output: statsdb1@testpart0@part=part1 +PREHOOK: Output: statsdb1@testpart0@part=part2 POSTHOOK: query: analyze table testpart0 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart0 POSTHOOK: Input: statsdb1@testpart0@part=part1 POSTHOOK: Input: statsdb1@testpart0@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart0 +POSTHOOK: Output: statsdb1@testpart0@part=part1 +POSTHOOK: Output: statsdb1@testpart0@part=part2 PREHOOK: query: describe formatted statsdb1.testpart0 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 @@ -1617,10 +1625,12 @@ PREHOOK: query: analyze table testtable0 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable0 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable0 POSTHOOK: query: analyze table testtable0 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable0 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable0 PREHOOK: query: describe formatted statsdb1.testtable0 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable0 @@ -1995,12 +2005,18 @@ PREHOOK: Input: statsdb1@testpart0 PREHOOK: Input: statsdb1@testpart0@part=part1 PREHOOK: Input: statsdb1@testpart0@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart0 +PREHOOK: Output: statsdb1@testpart0@part=part1 +PREHOOK: Output: statsdb1@testpart0@part=part2 POSTHOOK: query: analyze table testpart0 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart0 POSTHOOK: Input: statsdb1@testpart0@part=part1 POSTHOOK: Input: statsdb1@testpart0@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart0 +POSTHOOK: Output: statsdb1@testpart0@part=part1 +POSTHOOK: Output: statsdb1@testpart0@part=part2 PREHOOK: query: describe formatted statsdb1.testpart0 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart0 diff --git ql/src/test/results/clientpositive/alter_table_serde2.q.out ql/src/test/results/clientpositive/alter_table_serde2.q.out index ae0ef54755..324859d664 100644 --- ql/src/test/results/clientpositive/alter_table_serde2.q.out +++ ql/src/test/results/clientpositive/alter_table_serde2.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -179,7 +179,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/alter_table_update_status.q.out ql/src/test/results/clientpositive/alter_table_update_status.q.out index 452f37bba3..9e9164f144 100644 --- ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -35,10 +35,12 @@ POSTHOOK: Output: default@src_stat_int PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat +PREHOOK: Output: default@src_stat #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat +POSTHOOK: Output: default@src_stat #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat key PREHOOK: type: DESCTABLE @@ -81,10 +83,12 @@ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_int +PREHOOK: Output: default@src_stat_int #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_int +POSTHOOK: Output: default@src_stat_int #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_int key PREHOOK: type: DESCTABLE @@ -95,7 +99,7 @@ POSTHOOK: Input: default@src_stat_int # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector key double 66.0 406.0 10 15 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') @@ -109,7 +113,7 @@ POSTHOOK: Input: default@src_stat_int # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector key double 333.22 22.22 10 2222 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb diff --git ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out index 1dcc1fcf4e..ef9860ef99 100644 --- ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out +++ ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out @@ -35,10 +35,12 @@ POSTHOOK: Output: default@src_stat_int PREHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat +PREHOOK: Output: default@src_stat #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat +POSTHOOK: Output: default@src_stat #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat key PREHOOK: type: DESCTABLE @@ -81,10 +83,12 @@ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true PREHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_int +PREHOOK: Output: default@src_stat_int #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_int COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_int +POSTHOOK: Output: default@src_stat_int #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_int key PREHOOK: type: DESCTABLE @@ -95,7 +99,7 @@ POSTHOOK: Input: default@src_stat_int # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector key double 66.0 406.0 10 15 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: ALTER TABLE src_stat_int UPDATE STATISTICS for column key SET ('numDVs'='2222','lowValue'='333.22','highValue'='22.22') @@ -109,7 +113,7 @@ POSTHOOK: Input: default@src_stat_int # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector key double 333.22 22.22 10 2222 from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:dummydb diff --git ql/src/test/results/clientpositive/analyze_table_null_partition.q.out ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index 2bfc04a142..3a47edb9cb 100644 --- ql/src/test/results/clientpositive/analyze_table_null_partition.q.out +++ ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: age 15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -147,7 +147,7 @@ STAGE PLANS: partition values: age 30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -192,7 +192,7 @@ STAGE PLANS: partition values: age 40 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -237,7 +237,7 @@ STAGE PLANS: partition values: age __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name diff --git ql/src/test/results/clientpositive/analyze_tbl_date.q.out ql/src/test/results/clientpositive/analyze_tbl_date.q.out index a0cdbca8c1..eac13cc1bc 100644 --- ql/src/test/results/clientpositive/analyze_tbl_date.q.out +++ ql/src/test/results/clientpositive/analyze_tbl_date.q.out @@ -16,10 +16,12 @@ POSTHOOK: Lineage: test_table.d EXPRESSION [(values__tmp__table__1)values__tmp__ PREHOOK: query: analyze table test_table compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@test_table +PREHOOK: Output: default@test_table #### A masked pattern was here #### POSTHOOK: query: analyze table test_table compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table +POSTHOOK: Output: default@test_table #### A masked pattern was here #### PREHOOK: query: describe formatted test_table PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/analyze_tbl_part.q.out ql/src/test/results/clientpositive/analyze_tbl_part.q.out index a7a2220304..ec18625127 100644 --- ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -35,12 +35,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part PREHOOK: Input: default@src_stat_part@partitionid=1 PREHOOK: Input: default@src_stat_part@partitionid=2 +PREHOOK: Output: default@src_stat_part +PREHOOK: Output: default@src_stat_part@partitionid=1 +PREHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part POSTHOOK: Input: default@src_stat_part@partitionid=1 POSTHOOK: Input: default@src_stat_part@partitionid=2 +POSTHOOK: Output: default@src_stat_part +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE @@ -56,12 +62,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_part PREHOOK: Input: default@src_stat_part@partitionid=1 PREHOOK: Input: default@src_stat_part@partitionid=2 +PREHOOK: Output: default@src_stat_part +PREHOOK: Output: default@src_stat_part@partitionid=1 +PREHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for columns key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_part POSTHOOK: Input: default@src_stat_part@partitionid=1 POSTHOOK: Input: default@src_stat_part@partitionid=2 +POSTHOOK: Output: default@src_stat_part +POSTHOOK: Output: default@src_stat_part@partitionid=1 +POSTHOOK: Output: default@src_stat_part@partitionid=2 #### A masked pattern was here #### PREHOOK: query: describe formatted src_stat_part PARTITION(partitionId=1) key PREHOOK: type: DESCTABLE @@ -117,19 +129,27 @@ PREHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p\' PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_string_part PREHOOK: Input: default@src_stat_string_part@partitionname=p%271 +PREHOOK: Output: default@src_stat_string_part +PREHOOK: Output: default@src_stat_string_part@partitionname=p%271 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p\'1") COMPUTE STATISTICS for columns key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_string_part POSTHOOK: Input: default@src_stat_string_part@partitionname=p%271 +POSTHOOK: Output: default@src_stat_string_part +POSTHOOK: Output: default@src_stat_string_part@partitionname=p%271 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p\"1") COMPUTE STATISTICS for columns key, value PREHOOK: type: QUERY PREHOOK: Input: default@src_stat_string_part PREHOOK: Input: default@src_stat_string_part@partitionname=p%221 +PREHOOK: Output: default@src_stat_string_part +PREHOOK: Output: default@src_stat_string_part@partitionname=p%221 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p\"1") COMPUTE STATISTICS for columns key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src_stat_string_part POSTHOOK: Input: default@src_stat_string_part@partitionname=p%221 +POSTHOOK: Output: default@src_stat_string_part +POSTHOOK: Output: default@src_stat_string_part@partitionname=p%221 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out index 6e2975e671..d81f5660ea 100644 --- ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out +++ ql/src/test/results/clientpositive/annotate_stats_deep_filters.q.out @@ -59,10 +59,12 @@ POSTHOOK: Output: default@over1k PREHOOK: query: analyze table over1k compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@over1k +PREHOOK: Output: default@over1k #### A masked pattern was here #### POSTHOOK: query: analyze table over1k compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@over1k +POSTHOOK: Output: default@over1k #### A masked pattern was here #### PREHOOK: query: explain select count(*) from over1k where ( (t=1 and si=2) diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index b2f983606e..99b907cb10 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc where state='OH' @@ -87,17 +87,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -112,10 +112,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc where state='OH' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_groupby.q.out ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index f9a1eb8312..ed3d5942eb 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -66,20 +66,22 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c @@ -106,22 +108,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +131,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +153,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +161,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -177,10 +179,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select year from loc_orc group by year PREHOOK: type: QUERY @@ -743,30 +747,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out index 31c4ed147f..d95af92acd 100644 --- ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out +++ ql/src/test/results/clientpositive/annotate_stats_groupby2.q.out @@ -29,10 +29,12 @@ POSTHOOK: Output: default@location PREHOOK: query: analyze table location compute statistics for columns state, country PREHOOK: type: QUERY PREHOOK: Input: default@location +PREHOOK: Output: default@location #### A masked pattern was here #### POSTHOOK: query: analyze table location compute statistics for columns state, country POSTHOOK: type: QUERY POSTHOOK: Input: default@location +POSTHOOK: Output: default@location #### A masked pattern was here #### PREHOOK: query: explain select state, country from location group by state, country PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index a7e73a06c7..9173e7acf1 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -97,26 +97,32 @@ POSTHOOK: Output: default@loc PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY PREHOOK: Input: default@emp +PREHOOK: Output: default@emp #### A masked pattern was here #### POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp #### A masked pattern was here #### PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept +PREHOOK: Output: default@dept #### A masked pattern was here #### POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept #### A masked pattern was here #### PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc +PREHOOK: Output: default@loc #### A masked pattern was here #### POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc #### A masked pattern was here #### PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out index e04c1c6bc5..7d3c0abc61 100644 --- ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out @@ -297,10 +297,12 @@ POSTHOOK: Output: default@store PREHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space PREHOOK: type: QUERY PREHOOK: Input: default@store +PREHOOK: Output: default@store #### A masked pattern was here #### POSTHOOK: query: analyze table store compute statistics for columns s_store_sk, s_floor_space POSTHOOK: type: QUERY POSTHOOK: Input: default@store +POSTHOOK: Output: default@store #### A masked pattern was here #### PREHOOK: query: analyze table store_bigint compute statistics PREHOOK: type: QUERY @@ -313,10 +315,12 @@ POSTHOOK: Output: default@store_bigint PREHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space PREHOOK: type: QUERY PREHOOK: Input: default@store_bigint +PREHOOK: Output: default@store_bigint #### A masked pattern was here #### POSTHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space POSTHOOK: type: QUERY POSTHOOK: Input: default@store_bigint +POSTHOOK: Output: default@store_bigint #### A masked pattern was here #### PREHOOK: query: analyze table store_sales compute statistics PREHOOK: type: QUERY @@ -329,10 +333,12 @@ POSTHOOK: Output: default@store_sales PREHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity PREHOOK: type: QUERY PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales #### A masked pattern was here #### POSTHOOK: query: analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales #### A masked pattern was here #### PREHOOK: query: analyze table customer_address compute statistics PREHOOK: type: QUERY @@ -345,10 +351,12 @@ POSTHOOK: Output: default@customer_address PREHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk PREHOOK: type: QUERY PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address #### A masked pattern was here #### POSTHOOK: query: analyze table customer_address compute statistics for columns ca_address_sk POSTHOOK: type: QUERY POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address #### A masked pattern was here #### PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_limit.q.out ql/src/test/results/clientpositive/annotate_stats_limit.q.out index ea181cb8f6..5139db4d64 100644 --- ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state, locid, zip, year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index def4d4faee..3a94a6a4e3 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -273,11 +273,15 @@ PREHOOK: query: analyze table loc_orc partition(year='2001') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc PREHOOK: Input: default@loc_orc@year=2001 +PREHOOK: Output: default@loc_orc +PREHOOK: Output: default@loc_orc@year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc partition(year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc POSTHOOK: Input: default@loc_orc@year=2001 +POSTHOOK: Output: default@loc_orc +POSTHOOK: Output: default@loc_orc@year=2001 #### A masked pattern was here #### PREHOOK: query: explain select zip from loc_orc PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_select.q.out ql/src/test/results/clientpositive/annotate_stats_select.q.out index dec7f402ad..7f402db8b3 100644 --- ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -113,10 +113,12 @@ STAGE PLANS: PREHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 PREHOOK: type: QUERY PREHOOK: Input: default@alltypes_orc +PREHOOK: Output: default@alltypes_orc #### A masked pattern was here #### POSTHOOK: query: analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc +POSTHOOK: Output: default@alltypes_orc #### A masked pattern was here #### PREHOOK: query: explain select * from alltypes_orc PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_table.q.out ql/src/test/results/clientpositive/annotate_stats_table.q.out index 5d443f1381..e93ce4f36d 100644 --- ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -120,10 +120,12 @@ STAGE PLANS: PREHOOK: query: analyze table emp_orc compute statistics for columns deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc #### A masked pattern was here #### PREHOOK: query: explain select * from emp_orc PREHOOK: type: QUERY @@ -170,10 +172,12 @@ STAGE PLANS: PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc +PREHOOK: Output: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc +POSTHOOK: Output: default@emp_orc #### A masked pattern was here #### PREHOOK: query: explain select * from emp_orc PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/annotate_stats_union.q.out ql/src/test/results/clientpositive/annotate_stats_union.q.out index 059f261d97..3b4b169dc8 100644 --- ql/src/test/results/clientpositive/annotate_stats_union.q.out +++ ql/src/test/results/clientpositive/annotate_stats_union.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select state from loc_orc PREHOOK: type: QUERY @@ -282,18 +284,22 @@ PREHOOK: query: analyze table loc_staging compute statistics for columns state,l PREHOOK: type: QUERY PREHOOK: Input: test@loc_staging #### A masked pattern was here #### +PREHOOK: Output: test@loc_staging POSTHOOK: query: analyze table loc_staging compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_staging #### A masked pattern was here #### +POSTHOOK: Output: test@loc_staging PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: test@loc_orc #### A masked pattern was here #### +PREHOOK: Output: test@loc_orc POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: test@loc_orc #### A masked pattern was here #### +POSTHOOK: Output: test@loc_orc PREHOOK: query: explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp diff --git ql/src/test/results/clientpositive/autoColumnStats_1.q.out ql/src/test/results/clientpositive/autoColumnStats_1.q.out index 4cf6df18f9..5f78d88dc2 100644 --- ql/src/test/results/clientpositive/autoColumnStats_1.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_1.q.out @@ -382,6 +382,46 @@ POSTHOOK: query: create table if not exists nzhang_part14 (key string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@nzhang_part14 +PREHOOK: query: desc formatted nzhang_part14 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string + +# Partition Information +# col_name data_type comment + +value string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: insert overwrite table nzhang_part14 partition(value) select key, value from ( select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a @@ -407,6 +447,44 @@ POSTHOOK: Output: default@nzhang_part14@value= POSTHOOK: Output: default@nzhang_part14@value=__HIVE_DEFAULT_PARTITION__ POSTHOOK: Lineage: nzhang_part14 PARTITION(value= ).key EXPRESSION [] POSTHOOK: Lineage: nzhang_part14 PARTITION(value=__HIVE_DEFAULT_PARTITION__).key EXPRESSION [] +PREHOOK: query: desc formatted nzhang_part14 partition (value=' ') +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nzhang_part14 +POSTHOOK: query: desc formatted nzhang_part14 partition (value=' ') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nzhang_part14 +# col_name data_type comment + +key string + +# Partition Information +# col_name data_type comment + +value string + +# Detailed Partition Information +Partition Value: [ ] +Database: default +Table: nzhang_part14 +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} + numFiles 1 + numRows 2 + rawDataSize 4 + totalSize 6 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 PREHOOK: query: explain select key from nzhang_part14 PREHOOK: type: QUERY POSTHOOK: query: explain select key from nzhang_part14 @@ -477,11 +555,11 @@ STAGE PLANS: Processor Tree: TableScan alias: nzhang_part14 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: create table alter5 ( col1 string ) partitioned by (dt string) @@ -958,6 +1036,12 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/autoColumnStats_2.q.out ql/src/test/results/clientpositive/autoColumnStats_2.q.out index 791e6ae2fd..169d0f04c3 100644 --- ql/src/test/results/clientpositive/autoColumnStats_2.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_2.q.out @@ -125,18 +125,20 @@ PREHOOK: Input: default@a POSTHOOK: query: describe formatted a key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@a -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 205 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b key PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 205 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: from src insert overwrite table a select * insert into table b select * @@ -231,18 +233,20 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 0 205 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 0 309 2.812 3 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 0 214 6.812 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 0 309 6.812 7 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b select NULL, NULL from src limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -251,26 +255,28 @@ POSTHOOK: query: insert into table b select NULL, NULL from src limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@b -POSTHOOK: Lineage: b.key SIMPLE [] -POSTHOOK: Lineage: b.value SIMPLE [] +POSTHOOK: Lineage: b.key EXPRESSION [] +POSTHOOK: Lineage: b.value EXPRESSION [] PREHOOK: query: describe formatted b key PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 10 205 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 10 309 2.812 3 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 10 214 6.812 7 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 10 309 6.812 7 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: insert into table b(value) select key+100000 from src limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -287,18 +293,20 @@ PREHOOK: Input: default@b POSTHOOK: query: describe formatted b key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -key string 20 205 2.812 3 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +key string 20 309 2.812 3 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: describe formatted b value PREHOOK: type: DESCTABLE PREHOOK: Input: default@b POSTHOOK: query: describe formatted b value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@b -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment - -value string 10 214 8.0 8 from deserializer +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector + +value string 10 319 8.0 8 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} PREHOOK: query: drop table src_multi2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table src_multi2 @@ -467,11 +475,11 @@ STAGE PLANS: Processor Tree: TableScan alias: nzhang_part14 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11 Data size: 946 Basic stats: COMPLETE Column stats: PARTIAL ListSink PREHOOK: query: drop table alter5 @@ -514,7 +522,7 @@ Database: default Table: alter5 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\"}} numFiles 0 numRows 0 rawDataSize 0 @@ -687,7 +695,6 @@ Database: default Table: alter5 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col1\":\"true\"}} numFiles 1 totalSize 1906 #### A masked pattern was here #### @@ -1079,6 +1086,12 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/autoColumnStats_3.q.out ql/src/test/results/clientpositive/autoColumnStats_3.q.out index 2f70095b7a..3a23c94b13 100644 --- ql/src/test/results/clientpositive/autoColumnStats_3.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_3.q.out @@ -13,10 +13,12 @@ POSTHOOK: Output: default@src_multi1 PREHOOK: query: analyze table src_multi1 compute statistics for columns key PREHOOK: type: QUERY PREHOOK: Input: default@src_multi1 +PREHOOK: Output: default@src_multi1 #### A masked pattern was here #### POSTHOOK: query: analyze table src_multi1 compute statistics for columns key POSTHOOK: type: QUERY POSTHOOK: Input: default@src_multi1 +POSTHOOK: Output: default@src_multi1 #### A masked pattern was here #### PREHOOK: query: describe formatted src_multi1 PREHOOK: type: DESCTABLE @@ -228,11 +230,15 @@ PREHOOK: query: analyze table nzhang_part14 partition(ds='1', hr='3') compute st PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_part14 PREHOOK: Input: default@nzhang_part14@ds=1/hr=3 +PREHOOK: Output: default@nzhang_part14 +PREHOOK: Output: default@nzhang_part14@ds=1/hr=3 #### A masked pattern was here #### POSTHOOK: query: analyze table nzhang_part14 partition(ds='1', hr='3') compute statistics for columns value POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_part14 POSTHOOK: Input: default@nzhang_part14@ds=1/hr=3 +POSTHOOK: Output: default@nzhang_part14 +POSTHOOK: Output: default@nzhang_part14@ds=1/hr=3 #### A masked pattern was here #### PREHOOK: query: desc formatted nzhang_part14 partition(ds='1', hr='3') PREHOOK: type: DESCTABLE @@ -367,7 +373,7 @@ Database: default Table: nzhang_part14 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"value\":\"true\"}} numFiles 2 numRows 4 rawDataSize 12 @@ -407,7 +413,7 @@ Database: default Table: nzhang_part14 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 2 numRows 4 rawDataSize 16 diff --git ql/src/test/results/clientpositive/autoColumnStats_4.q.out ql/src/test/results/clientpositive/autoColumnStats_4.q.out index a0581f8d8a..183ef06d4a 100644 --- ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -50,8 +50,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-3, Stage-4 + Stage-3 depends on stages: Stage-0, Stage-4 Stage-4 depends on stages: Stage-2 STAGE PLANS: @@ -138,10 +137,8 @@ STAGE PLANS: name: default.acid_dtt Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-5 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b Column Types: int, varchar(128) @@ -197,7 +194,6 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/autoColumnStats_5.q.out ql/src/test/results/clientpositive/autoColumnStats_5.q.out index 1298d989a2..36a94df3ef 100644 --- ql/src/test/results/clientpositive/autoColumnStats_5.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_5.q.out @@ -17,7 +17,6 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -98,10 +97,8 @@ STAGE PLANS: name: default.partitioned1 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b Column Types: int, string @@ -255,7 +252,6 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -336,10 +332,8 @@ STAGE PLANS: name: default.partitioned1 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b, c, d Column Types: int, string, int, string @@ -449,7 +443,6 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -530,10 +523,8 @@ STAGE PLANS: name: default.partitioned1 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b, c, d Column Types: int, string, int, string @@ -603,7 +594,7 @@ Database: default Table: partitioned1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\",\"d\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} numFiles 2 numRows 6 rawDataSize 78 @@ -637,6 +628,6 @@ POSTHOOK: query: desc formatted partitioned1 partition(part=1) c POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partitioned1 col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitvector -# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector +# col_name data_type comment -c int 100 200 0 2 HL from deserializer +c int from deserializer diff --git ql/src/test/results/clientpositive/autoColumnStats_6.q.out ql/src/test/results/clientpositive/autoColumnStats_6.q.out index c4ab489b39..70788fde9f 100644 --- ql/src/test/results/clientpositive/autoColumnStats_6.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -30,7 +30,6 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -113,10 +112,8 @@ STAGE PLANS: name: default.orcfile_merge2a Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: int, string diff --git ql/src/test/results/clientpositive/autoColumnStats_7.q.out ql/src/test/results/clientpositive/autoColumnStats_7.q.out index 2dc9fc2d42..acea69e484 100644 --- ql/src/test/results/clientpositive/autoColumnStats_7.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_7.q.out @@ -26,8 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-3, Stage-5 + Stage-3 depends on stages: Stage-0, Stage-5 Stage-4 depends on stages: Stage-2 Stage-5 depends on stages: Stage-4 @@ -112,10 +111,8 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-3 - Stats-Aggr Operator - - Stage: Stage-6 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, c1, c2 Column Types: string, int, string diff --git ql/src/test/results/clientpositive/autoColumnStats_8.q.out ql/src/test/results/clientpositive/autoColumnStats_8.q.out index c913d97fe5..3afd07a560 100644 --- ql/src/test/results/clientpositive/autoColumnStats_8.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_8.q.out @@ -48,10 +48,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-3, Stage-4, Stage-5 - Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1, Stage-5 Stage-5 depends on stages: Stage-2 STAGE PLANS: @@ -448,25 +446,10 @@ STAGE PLANS: name: default.nzhang_part8 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### - Stage: Stage-6 - Column Stats Work - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part8 - Is Table Level Stats: false - - Stage: Stage-7 - Column Stats Work - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.nzhang_part8 - Is Table Level Stats: false - Stage: Stage-1 Move Operator tables: @@ -496,8 +479,14 @@ STAGE PLANS: name: default.nzhang_part8 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false Stage: Stage-5 Map Reduce diff --git ql/src/test/results/clientpositive/autoColumnStats_9.q.out ql/src/test/results/clientpositive/autoColumnStats_9.q.out index fda71e0dd2..78b3316a11 100644 --- ql/src/test/results/clientpositive/autoColumnStats_9.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_9.q.out @@ -20,8 +20,7 @@ STAGE DEPENDENCIES: Stage-7 Stage-5 depends on stages: Stage-7 Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-2, Stage-3 + Stage-2 depends on stages: Stage-0, Stage-3 Stage-3 depends on stages: Stage-5 STAGE PLANS: @@ -166,10 +165,8 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: int, string diff --git ql/src/test/results/clientpositive/auto_join1.q.out ql/src/test/results/clientpositive/auto_join1.q.out index 5f4bb7452a..2edfb864e8 100644 --- ql/src/test/results/clientpositive/auto_join1.q.out +++ ql/src/test/results/clientpositive/auto_join1.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -91,7 +107,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/auto_join14.q.out ql/src/test/results/clientpositive/auto_join14.q.out index 1dd677c3d6..52aaa023c5 100644 --- ql/src/test/results/clientpositive/auto_join14.q.out +++ ql/src/test/results/clientpositive/auto_join14.q.out @@ -15,13 +15,14 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -91,7 +107,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value diff --git ql/src/test/results/clientpositive/auto_join17.q.out ql/src/test/results/clientpositive/auto_join17.q.out index d39c36eac3..20adc30831 100644 --- ql/src/test/results/clientpositive/auto_join17.q.out +++ ql/src/test/results/clientpositive/auto_join17.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -91,7 +107,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/auto_join19.q.out ql/src/test/results/clientpositive/auto_join19.q.out index 3f70055d81..80615deddc 100644 --- ql/src/test/results/clientpositive/auto_join19.q.out +++ ql/src/test/results/clientpositive/auto_join19.q.out @@ -17,13 +17,14 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,7 +109,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/auto_join19_inclause.q.out ql/src/test/results/clientpositive/auto_join19_inclause.q.out index 3f70055d81..80615deddc 100644 --- ql/src/test/results/clientpositive/auto_join19_inclause.q.out +++ ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -17,13 +17,14 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,7 +109,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/auto_join2.q.out ql/src/test/results/clientpositive/auto_join2.q.out index b17d344985..63fc65a7b6 100644 --- ql/src/test/results/clientpositive/auto_join2.q.out +++ ql/src/test/results/clientpositive/auto_join2.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -62,7 +63,7 @@ STAGE PLANS: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -103,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -117,7 +133,34 @@ STAGE PLANS: name: default.dest_j2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/auto_join25.q.out ql/src/test/results/clientpositive/auto_join25.q.out index 534bdb6ff0..14f199bf60 100644 --- ql/src/test/results/clientpositive/auto_join25.q.out +++ ql/src/test/results/clientpositive/auto_join25.q.out @@ -33,11 +33,12 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED RUN: Stage-0:MOVE -RUN: Stage-2:STATS +RUN: Stage-3:MAPRED +RUN: Stage-2:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -73,14 +74,15 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE -RUN: Stage-3:STATS +RUN: Stage-4:MAPRED +RUN: Stage-3:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j2 @@ -114,11 +116,12 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED RUN: Stage-0:MOVE -RUN: Stage-2:STATS +RUN: Stage-3:MAPRED +RUN: Stage-2:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git ql/src/test/results/clientpositive/auto_join26.q.out ql/src/test/results/clientpositive/auto_join26.q.out index e6d966ffad..91d79857c2 100644 --- ql/src/test/results/clientpositive/auto_join26.q.out +++ ql/src/test/results/clientpositive/auto_join26.q.out @@ -15,13 +15,14 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -110,7 +126,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key diff --git ql/src/test/results/clientpositive/auto_join3.q.out ql/src/test/results/clientpositive/auto_join3.q.out index 35e8273766..01ba7f662a 100644 --- ql/src/test/results/clientpositive/auto_join3.q.out +++ ql/src/test/results/clientpositive/auto_join3.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -64,7 +65,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -99,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -113,7 +129,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/auto_join4.q.out ql/src/test/results/clientpositive/auto_join4.q.out index 6c0fccdb6d..62cb55c02f 100644 --- ql/src/test/results/clientpositive/auto_join4.q.out +++ ql/src/test/results/clientpositive/auto_join4.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +67,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -113,7 +129,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join5.q.out ql/src/test/results/clientpositive/auto_join5.q.out index 410306175a..03f7ae4601 100644 --- ql/src/test/results/clientpositive/auto_join5.q.out +++ ql/src/test/results/clientpositive/auto_join5.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -66,7 +67,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -113,7 +129,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join6.q.out ql/src/test/results/clientpositive/auto_join6.q.out index 53caf7d25e..7547201a50 100644 --- ql/src/test/results/clientpositive/auto_join6.q.out +++ ql/src/test/results/clientpositive/auto_join6.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -110,7 +126,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join7.q.out ql/src/test/results/clientpositive/auto_join7.q.out index a657c301ee..a7b4000d3b 100644 --- ql/src/test/results/clientpositive/auto_join7.q.out +++ ql/src/test/results/clientpositive/auto_join7.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +127,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -138,7 +154,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join8.q.out ql/src/test/results/clientpositive/auto_join8.q.out index 2689578135..e190f09314 100644 --- ql/src/test/results/clientpositive/auto_join8.q.out +++ ql/src/test/results/clientpositive/auto_join8.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +67,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -102,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -116,7 +132,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/auto_join9.q.out ql/src/test/results/clientpositive/auto_join9.q.out index d7d7d181f0..b4aa03ea1a 100644 --- ql/src/test/results/clientpositive/auto_join9.q.out +++ ql/src/test/results/clientpositive/auto_join9.q.out @@ -15,13 +15,14 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -91,7 +107,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' diff --git ql/src/test/results/clientpositive/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index ba8d16c0fc..a7508c287b 100644 --- ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -141,7 +141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -162,7 +162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -277,7 +277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -298,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -414,7 +414,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -435,7 +435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -551,7 +551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -572,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out index 8c7658c447..ac143ddb9d 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out @@ -69,9 +69,10 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1, Stage-5 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -104,6 +105,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -114,6 +125,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -126,7 +161,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -139,7 +179,31 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -246,9 +310,10 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1, Stage-5 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -281,6 +346,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -291,6 +366,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -303,7 +402,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -316,7 +420,31 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -423,9 +551,10 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1, Stage-5 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -458,6 +587,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -468,6 +607,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -480,7 +643,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -493,7 +661,31 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index b53e6704cc..ff2cd8cd0f 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -50,6 +50,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +94,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +173,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -202,8 +229,86 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: test_db_smb_mapjoin_11.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY @@ -1827,7 +1932,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2041,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out index 9af4683451..826e24b653 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out @@ -129,7 +129,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -204,8 +204,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: test_db_smb_mapjoin_12.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY @@ -298,7 +304,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -409,8 +415,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: test_db_smb_mapjoin_12.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out index 49ff6355b3..3bc00c8973 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 82f5804eea..13fcdf04d2 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -648,6 +648,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -669,7 +690,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: test_db_smb_mapjoin_7.smb_join_results Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/binary_output_format.q.out ql/src/test/results/clientpositive/binary_output_format.q.out index ddb6adf4a0..2f72ae9769 100644 --- ql/src/test/results/clientpositive/binary_output_format.q.out +++ ql/src/test/results/clientpositive/binary_output_format.q.out @@ -117,6 +117,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string) + outputColumnNames: mydata + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(mydata, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -168,6 +184,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -208,8 +253,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: mydata + Column Types: string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/bucket1.q.out ql/src/test/results/clientpositive/bucket1.q.out index 1d204731cd..e3ed750f77 100644 --- ql/src/test/results/clientpositive/bucket1.q.out +++ ql/src/test/results/clientpositive/bucket1.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -129,6 +130,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -160,8 +188,83 @@ STAGE PLANS: name: default.bucket1_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket1_1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucket1_1 select * from src diff --git ql/src/test/results/clientpositive/bucket2.q.out ql/src/test/results/clientpositive/bucket2.q.out index 48ccafb746..c57769e266 100644 --- ql/src/test/results/clientpositive/bucket2.q.out +++ ql/src/test/results/clientpositive/bucket2.q.out @@ -129,6 +129,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -160,8 +195,14 @@ STAGE PLANS: name: default.bucket2_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src diff --git ql/src/test/results/clientpositive/bucket3.q.out ql/src/test/results/clientpositive/bucket3.q.out index b1173e7b35..a2109ee208 100644 --- ql/src/test/results/clientpositive/bucket3.q.out +++ ql/src/test/results/clientpositive/bucket3.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -127,6 +128,34 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,8 +186,90 @@ STAGE PLANS: name: default.bucket3_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src diff --git ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index 70cd53c6e5..3545f865a2 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -117,13 +117,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -197,7 +198,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -257,6 +258,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -390,8 +418,83 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -451,13 +554,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -531,7 +635,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -570,7 +674,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -591,6 +695,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -704,7 +835,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -724,8 +855,83 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index d0c3a1aca0..71a9f2fdba 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -101,13 +101,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -181,7 +182,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +242,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -374,8 +402,83 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -435,13 +538,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -515,7 +619,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +679,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -708,8 +839,83 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index eed4a5a970..eb9b1d537c 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -101,13 +101,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -181,7 +182,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +242,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -374,8 +402,83 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -435,13 +538,14 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -515,7 +619,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +679,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -708,8 +839,83 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out index 5743944b4c..688fdfa125 100644 --- ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out +++ ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out @@ -189,7 +189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -212,7 +212,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -239,7 +239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -262,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -289,7 +289,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -312,7 +312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -545,7 +545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -568,7 +568,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -618,7 +618,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -645,7 +645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -668,7 +668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git ql/src/test/results/clientpositive/bucketmapjoin13.q.out ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 71b2924af2..406cca455a 100644 --- ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -192,7 +192,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -241,7 +241,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -371,7 +371,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -480,7 +480,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -615,7 +615,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -724,7 +724,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -859,7 +859,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -968,7 +968,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/bucketmapjoin5.q.out ql/src/test/results/clientpositive/bucketmapjoin5.q.out index 4b989932ce..daa6b13497 100644 --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -272,6 +272,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -376,6 +392,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -415,8 +460,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -792,7 +843,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -813,6 +864,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -917,6 +984,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -936,7 +1032,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -956,8 +1052,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -973,7 +1075,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1003,7 +1105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1024,7 +1126,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1060,7 +1162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1090,7 +1192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1111,7 +1213,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index 97cb1f10d3..8e2ff19e25 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -212,6 +212,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -265,6 +281,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -304,8 +349,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index 724df736ee..fb873d7c78 100644 --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -275,6 +275,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -328,6 +344,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -367,8 +412,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 165f0dc1e5..b59c4bc90a 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -82,7 +82,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from @@ -187,7 +192,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * from @@ -292,7 +302,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') @@ -311,7 +326,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -343,6 +359,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -357,7 +389,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') @@ -411,5 +477,10 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index c5e03be100..52ef3dbb29 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -78,7 +78,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.value, x.key from @@ -150,7 +155,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -182,6 +188,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), '1' (type: string) + outputColumnNames: value, key, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -196,7 +218,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 1d794c3d28..eaf85c384e 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -65,39 +65,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -106,7 +80,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -121,8 +95,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -134,6 +106,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -148,100 +134,37 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-5 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) @@ -339,43 +262,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -388,7 +281,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -400,8 +293,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) @@ -413,6 +304,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -427,106 +332,37 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 1e70105b9b..661114d3a6 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -65,43 +65,13 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -114,7 +84,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -129,8 +99,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -142,6 +110,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -156,112 +138,37 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -346,43 +253,13 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 - Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:test_table2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:test_table2 - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -395,7 +272,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -410,8 +287,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -423,6 +298,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -437,112 +326,37 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:test_table1 - TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index f0e77f00af..361eead889 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -156,7 +156,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 Stage: Stage-8 Map Reduce Local Work @@ -434,7 +439,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 Stage: Stage-8 Map Reduce Local Work diff --git ql/src/test/results/clientpositive/case_sensitivity.q.out ql/src/test/results/clientpositive/case_sensitivity.q.out index b3969ccf90..c77994d4cb 100644 --- ql/src/test/results/clientpositive/case_sensitivity.q.out +++ ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 837 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/cast1.q.out ql/src/test/results/clientpositive/cast1.q.out index 9feb14f1bb..9c23a76e47 100644 --- ql/src/test/results/clientpositive/cast1.q.out +++ ql/src/test/results/clientpositive/cast1.q.out @@ -44,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -65,7 +91,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7 + Column Types: int, double, double, double, int, string, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index 88b5d8464f..36cd5fbf4b 100644 --- ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -66,20 +66,22 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: state, locid, zip, year - Statistics: Num rows: 8 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select a, c, min(b) from ( select state as a, locid as b, count(*) as c @@ -106,22 +108,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +131,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +153,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +161,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -177,10 +179,12 @@ STAGE PLANS: PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select year from loc_orc group by year PREHOOK: type: QUERY @@ -767,30 +771,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index afc4fd90f9..7fddda8d73 100644 --- ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -49,10 +49,12 @@ POSTHOOK: Output: default@tbl1 PREHOOK: query: analyze table tbl1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 +PREHOOK: Output: default@tbl1 #### A masked pattern was here #### POSTHOOK: query: analyze table tbl1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 +POSTHOOK: Output: default@tbl1 #### A masked pattern was here #### PREHOOK: query: analyze table tbl2 compute statistics PREHOOK: type: QUERY @@ -65,10 +67,12 @@ POSTHOOK: Output: default@tbl2 PREHOOK: query: analyze table tbl2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@tbl2 #### A masked pattern was here #### POSTHOOK: query: analyze table tbl2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@tbl2 #### A masked pattern was here #### PREHOOK: query: explain select count(*) from ( diff --git ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out index b29628038c..5fbc878e88 100644 --- ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out +++ ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -44,7 +45,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -91,7 +107,38 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out index d4d70bc35e..68628fe345 100644 --- ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -21,7 +21,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -77,7 +93,38 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 @@ -131,7 +178,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -175,6 +223,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -187,7 +250,38 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index c09764c156..bece89f0d5 100644 --- ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -74,6 +74,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,7 +106,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/colstats_all_nulls.q.out ql/src/test/results/clientpositive/colstats_all_nulls.q.out index 9efab75d72..adc06666bd 100644 --- ql/src/test/results/clientpositive/colstats_all_nulls.q.out +++ ql/src/test/results/clientpositive/colstats_all_nulls.q.out @@ -30,10 +30,12 @@ POSTHOOK: Lineage: all_nulls.c SIMPLE [] PREHOOK: query: analyze table all_nulls compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@all_nulls +PREHOOK: Output: default@all_nulls #### A masked pattern was here #### POSTHOOK: query: analyze table all_nulls compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@all_nulls +POSTHOOK: Output: default@all_nulls #### A masked pattern was here #### PREHOOK: query: describe formatted all_nulls a PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out index c48042227e..37f855df5b 100644 --- ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out +++ ql/src/test/results/clientpositive/column_pruner_multiple_children.q.out @@ -27,8 +27,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - Stage-4 depends on stages: Stage-2, Stage-3 + Stage-2 depends on stages: Stage-0, Stage-3 Stage-3 depends on stages: Stage-1 STAGE PLANS: @@ -89,10 +88,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-4 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: int, string diff --git ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out index 018e18fa9b..feac97e8eb 100644 --- ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out +++ ql/src/test/results/clientpositive/columnarserde_create_shortcut.q.out @@ -60,7 +60,8 @@ STAGE PLANS: name: default.columnarserde_create_shortcut Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src_thrift INSERT OVERWRITE TABLE columnarserde_create_shortcut SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring DISTRIBUTE BY 1 diff --git ql/src/test/results/clientpositive/columnstats_infinity.q.out ql/src/test/results/clientpositive/columnstats_infinity.q.out index 23ca4869a5..a1ec9b205a 100644 --- ql/src/test/results/clientpositive/columnstats_infinity.q.out +++ ql/src/test/results/clientpositive/columnstats_infinity.q.out @@ -118,10 +118,12 @@ Storage Desc Params: PREHOOK: query: analyze table table_change_numeric_group_string_group_floating_string_group compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@table_change_numeric_group_string_group_floating_string_group +PREHOOK: Output: default@table_change_numeric_group_string_group_floating_string_group #### A masked pattern was here #### POSTHOOK: query: analyze table table_change_numeric_group_string_group_floating_string_group compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@table_change_numeric_group_string_group_floating_string_group +POSTHOOK: Output: default@table_change_numeric_group_string_group_floating_string_group #### A masked pattern was here #### PREHOOK: query: desc formatted table_change_numeric_group_string_group_floating_string_group PREHOOK: type: DESCTABLE @@ -276,7 +278,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c1\":\"true\",\"c10\":\"true\",\"c12\":\"true\",\"c13\":\"true\",\"c15\":\"true\",\"c3\":\"true\",\"c4\":\"true\",\"c6\":\"true\",\"c7\":\"true\",\"c9\":\"true\",\"insert_num\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c1\":\"true\",\"c10\":\"true\",\"c11\":\"true\",\"c12\":\"true\",\"c13\":\"true\",\"c14\":\"true\",\"c15\":\"true\",\"c2\":\"true\",\"c3\":\"true\",\"c4\":\"true\",\"c5\":\"true\",\"c6\":\"true\",\"c7\":\"true\",\"c8\":\"true\",\"c9\":\"true\",\"insert_num\":\"true\"}} numFiles 1 numRows 5 rawDataSize 1250 diff --git ql/src/test/results/clientpositive/columnstats_partlvl.q.out ql/src/test/results/clientpositive/columnstats_partlvl.q.out index 1e1c55d4bf..b6075774f4 100644 --- ql/src/test/results/clientpositive/columnstats_partlvl.q.out +++ ql/src/test/results/clientpositive/columnstats_partlvl.q.out @@ -83,7 +83,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeID Column Types: int @@ -106,7 +107,8 @@ STAGE PLANS: TableScan alias: employee_part Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.employee_part/ + GatherStats: true Select Operator expressions: employeeid (type: int) outputColumnNames: employeeid @@ -215,7 +217,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.employee_part/ Column Stats Desc: Columns: employeeID Column Types: int @@ -226,11 +230,15 @@ PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) co PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns employeeID POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 #### A masked pattern was here #### PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID @@ -285,7 +293,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeID Column Types: int @@ -308,7 +317,8 @@ STAGE PLANS: TableScan alias: employee_part Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.employee_part/ + GatherStats: true Select Operator expressions: employeeid (type: int) outputColumnNames: employeeid @@ -417,7 +427,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.employee_part/ Column Stats Desc: Columns: employeeID Column Types: int @@ -428,11 +440,15 @@ PREHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) co PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns employeeID POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: explain analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns @@ -450,22 +466,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string) outputColumnNames: employeeid, employeename - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') keys: 2000.0 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 2000.0 (type: double) sort order: + Map-reduce partition columns: 2000.0 (type: double) - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator @@ -473,21 +489,22 @@ STAGE PLANS: keys: 2000.0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct), _col2 (type: struct), 2000.0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 42 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeid, employeename Column Types: int, string @@ -497,11 +514,15 @@ PREHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) co PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary=2000.0) employeeID PREHOOK: type: DESCTABLE @@ -537,22 +558,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double) outputColumnNames: employeeid, employeename, employeesalary - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') keys: employeesalary (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: Group By Operator @@ -560,21 +581,22 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeid, employeename Column Types: int, string @@ -585,12 +607,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition(employeeSalary=2000.0) employeeID PREHOOK: type: DESCTABLE @@ -626,11 +654,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string) outputColumnNames: employeeid, employeename - Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 184 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') mode: hash @@ -655,7 +683,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeid, employeename Column Types: int, string @@ -666,12 +695,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part employeeID PREHOOK: type: DESCTABLE @@ -699,11 +734,16 @@ PREHOOK: query: analyze table default.Employee_Part partition (employeeSalary=20 PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 #### A masked pattern was here #### +Cannot get table employee_part POSTHOOK: query: analyze table default.Employee_Part partition (employeeSalary=2000.0) compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 #### A masked pattern was here #### PREHOOK: query: describe formatted default.Employee_Part partition (employeeSalary=2000.0) employeeID PREHOOK: type: DESCTABLE @@ -720,12 +760,19 @@ PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0 PREHOOK: Input: default@employee_part@employeesalary=4000.0 +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0 +PREHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### +Cannot get table employee_part POSTHOOK: query: analyze table default.Employee_Part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0 POSTHOOK: Input: default@employee_part@employeesalary=4000.0 +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0 +POSTHOOK: Output: default@employee_part@employeesalary=4000.0 #### A masked pattern was here #### PREHOOK: query: use default PREHOOK: type: SWITCHDATABASE diff --git ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out index 0739b497f6..477b89e021 100644 --- ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out +++ ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out @@ -119,7 +119,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeName, employeeID Column Types: string, int @@ -129,11 +130,15 @@ PREHOOK: query: analyze table Employee_Part partition (employeeSalary='4000.0', PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary='4000.0', country) compute statistics for columns employeeName, employeeID POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='4000.0', country='USA') employeeName PREHOOK: type: DESCTABLE @@ -197,7 +202,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeID Column Types: int @@ -208,12 +214,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@employee_part PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary='2000.0') compute statistics for columns employeeID POSTHOOK: type: QUERY POSTHOOK: Input: default@employee_part POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='2000.0', country='USA') employeeID PREHOOK: type: DESCTABLE @@ -249,22 +261,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: employeeid (type: int), employeesalary (type: double), country (type: string) outputColumnNames: employeeid, employeesalary, country - Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 'hll') keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct) Reduce Operator Tree: Group By Operator @@ -272,21 +284,22 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeID Column Types: int @@ -301,6 +314,13 @@ PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary) compute statistics for columns employeeID POSTHOOK: type: QUERY @@ -311,6 +331,13 @@ POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='3000.0', country='UK') employeeID PREHOOK: type: DESCTABLE @@ -337,22 +364,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee_part - Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: employeeid (type: int), employeename (type: string), employeesalary (type: double), country (type: string) outputColumnNames: employeeid, employeename, employeesalary, country - Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(employeeid, 'hll'), compute_stats(employeename, 'hll') keys: employeesalary (type: double), country (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string) - Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 54 Data size: 412 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: Group By Operator @@ -360,21 +387,22 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 206 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: employeeid, employeename Column Types: int, string @@ -389,6 +417,13 @@ PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee_part +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee_Part partition (employeeSalary,country) compute statistics for columns POSTHOOK: type: QUERY @@ -399,6 +434,13 @@ POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee_part@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee_part@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee_Part partition (employeeSalary='3500.0', country='UK') employeeName PREHOOK: type: DESCTABLE @@ -466,6 +508,11 @@ PREHOOK: Input: default@employee@employeesalary=2000.0/country=UK PREHOOK: Input: default@employee@employeesalary=2000.0/country=USA PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=3500.0/country=UK #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary,country) compute statistics for columns POSTHOOK: type: QUERY @@ -474,6 +521,11 @@ POSTHOOK: Input: default@employee@employeesalary=2000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=2000.0/country=USA POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='3500.0', country='UK') employeeName PREHOOK: type: DESCTABLE @@ -511,6 +563,13 @@ PREHOOK: Input: default@employee@employeesalary=3000.0/country=UK PREHOOK: Input: default@employee@employeesalary=3000.0/country=USA PREHOOK: Input: default@employee@employeesalary=3500.0/country=UK PREHOOK: Input: default@employee@employeesalary=4000.0/country=USA +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=2000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=2000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3000.0/country=UK +PREHOOK: Output: default@employee@employeesalary=3000.0/country=USA +PREHOOK: Output: default@employee@employeesalary=3500.0/country=UK +PREHOOK: Output: default@employee@employeesalary=4000.0/country=USA #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary) compute statistics for columns POSTHOOK: type: QUERY @@ -521,6 +580,13 @@ POSTHOOK: Input: default@employee@employeesalary=3000.0/country=UK POSTHOOK: Input: default@employee@employeesalary=3000.0/country=USA POSTHOOK: Input: default@employee@employeesalary=3500.0/country=UK POSTHOOK: Input: default@employee@employeesalary=4000.0/country=USA +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=2000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=3000.0/country=USA +POSTHOOK: Output: default@employee@employeesalary=3500.0/country=UK +POSTHOOK: Output: default@employee@employeesalary=4000.0/country=USA #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='3000.0', country='USA') employeeName PREHOOK: type: DESCTABLE @@ -552,11 +618,15 @@ PREHOOK: query: analyze table Employee partition (employeeSalary='6000.0',countr PREHOOK: type: QUERY PREHOOK: Input: default@employee PREHOOK: Input: default@employee@employeesalary=6000.0/country=UK +PREHOOK: Output: default@employee +PREHOOK: Output: default@employee@employeesalary=6000.0/country=UK #### A masked pattern was here #### POSTHOOK: query: analyze table Employee partition (employeeSalary='6000.0',country='UK') compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@employee POSTHOOK: Input: default@employee@employeesalary=6000.0/country=UK +POSTHOOK: Output: default@employee +POSTHOOK: Output: default@employee@employeesalary=6000.0/country=UK #### A masked pattern was here #### PREHOOK: query: describe formatted Employee partition (employeeSalary='6000.0', country='UK') employeeName PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/columnstats_quoting.q.out ql/src/test/results/clientpositive/columnstats_quoting.q.out index b17ce0e67a..683c1e274f 100644 --- ql/src/test/results/clientpositive/columnstats_quoting.q.out +++ ql/src/test/results/clientpositive/columnstats_quoting.q.out @@ -53,7 +53,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: user id, user name Column Types: bigint, string @@ -62,10 +63,12 @@ STAGE PLANS: PREHOOK: query: analyze table user_web_events compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events #### A masked pattern was here #### POSTHOOK: query: analyze table user_web_events compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events #### A masked pattern was here #### PREHOOK: query: explain analyze table user_web_events compute statistics for columns `user id` PREHOOK: type: QUERY @@ -110,7 +113,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: user id Column Types: bigint @@ -119,8 +123,10 @@ STAGE PLANS: PREHOOK: query: analyze table user_web_events compute statistics for columns `user id` PREHOOK: type: QUERY PREHOOK: Input: default@user_web_events +PREHOOK: Output: default@user_web_events #### A masked pattern was here #### POSTHOOK: query: analyze table user_web_events compute statistics for columns `user id` POSTHOOK: type: QUERY POSTHOOK: Input: default@user_web_events +POSTHOOK: Output: default@user_web_events #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/columnstats_tbllvl.q.out ql/src/test/results/clientpositive/columnstats_tbllvl.q.out index 25892d8fa6..1f62a64d1a 100644 --- ql/src/test/results/clientpositive/columnstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/columnstats_tbllvl.q.out @@ -83,7 +83,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -106,7 +107,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) outputColumnNames: sourceip, adrevenue, avgtimeonsite @@ -205,7 +207,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -215,10 +219,12 @@ STAGE PLANS: PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: explain analyze table default.UserVisits_web_text_none compute statistics for columns @@ -236,11 +242,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: uservisits_web_text_none - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') mode: hash @@ -265,7 +271,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Column Types: string, string, string, float, string, string, string, string, int @@ -274,10 +281,12 @@ STAGE PLANS: PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: describe formatted UserVisits_web_text_none destURL PREHOOK: type: DESCTABLE @@ -288,7 +297,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector destURL string 0 55 48.945454545454545 96 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -298,7 +307,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector adRevenue float 13.099044799804688 492.98870849609375 0 55 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -308,7 +317,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector avgTimeOnSite int 1 9 0 9 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, b double, @@ -374,7 +383,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b, c, d, e Column Types: int, double, string, boolean, binary @@ -383,10 +393,12 @@ STAGE PLANS: PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: create database if not exists dummydb PREHOOK: type: CREATEDATABASE @@ -403,10 +415,13 @@ POSTHOOK: Input: database:dummydb PREHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### +Cannot get table uservisits_web_text_none POSTHOOK: query: analyze table default.UserVisits_web_text_none compute statistics for columns destURL POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: describe formatted default.UserVisits_web_text_none destURL PREHOOK: type: DESCTABLE @@ -417,7 +432,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector destURL string 0 55 48.945454545454545 96 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: CREATE TABLE UserVisits_in_dummy_db ( sourceIP string, destURL string, @@ -505,7 +520,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -528,7 +544,8 @@ STAGE PLANS: TableScan alias: uservisits_in_dummy_db Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ + GatherStats: true Select Operator expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) outputColumnNames: sourceip, adrevenue, avgtimeonsite @@ -627,7 +644,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: dummydb.uservisits_in_dummy_db/ Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -637,10 +656,13 @@ STAGE PLANS: PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### +Cannot get table uservisits_in_dummy_db POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### PREHOOK: query: explain analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns @@ -658,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: uservisits_in_dummy_db - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sourceip (type: string), desturl (type: string), visitdate (type: string), adrevenue (type: float), useragent (type: string), ccode (type: string), lcode (type: string), skeyword (type: string), avgtimeonsite (type: int) outputColumnNames: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite - Statistics: Num rows: 9 Data size: 7060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 7005 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(sourceip, 'hll'), compute_stats(desturl, 'hll'), compute_stats(visitdate, 'hll'), compute_stats(adrevenue, 'hll'), compute_stats(useragent, 'hll'), compute_stats(ccode, 'hll'), compute_stats(lcode, 'hll'), compute_stats(skeyword, 'hll'), compute_stats(avgtimeonsite, 'hll') mode: hash @@ -687,7 +709,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: sourceip, desturl, visitdate, adrevenue, useragent, ccode, lcode, skeyword, avgtimeonsite Column Types: string, string, string, float, string, string, string, string, int @@ -696,10 +719,13 @@ STAGE PLANS: PREHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: dummydb@uservisits_in_dummy_db +PREHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### +Cannot get table uservisits_in_dummy_db POSTHOOK: query: analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: dummydb@uservisits_in_dummy_db +POSTHOOK: Output: dummydb@uservisits_in_dummy_db #### A masked pattern was here #### PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db destURL PREHOOK: type: DESCTABLE @@ -710,7 +736,7 @@ POSTHOOK: Input: dummydb@uservisits_in_dummy_db # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector destURL string 0 55 48.945454545454545 96 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db @@ -720,7 +746,7 @@ POSTHOOK: Input: dummydb@uservisits_in_dummy_db # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector adRevenue float 13.099044799804688 492.98870849609375 0 55 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: describe formatted dummydb.UserVisits_in_dummy_db avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db @@ -730,7 +756,7 @@ POSTHOOK: Input: dummydb@uservisits_in_dummy_db # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector avgTimeOnSite int 1 9 0 9 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} PREHOOK: query: drop table dummydb.UserVisits_in_dummy_db PREHOOK: type: DROPTABLE PREHOOK: Input: dummydb@uservisits_in_dummy_db diff --git ql/src/test/results/clientpositive/compustat_avro.q.out ql/src/test/results/clientpositive/compustat_avro.q.out index db20bef04e..aa7473253d 100644 --- ql/src/test/results/clientpositive/compustat_avro.q.out +++ ql/src/test/results/clientpositive/compustat_avro.q.out @@ -37,10 +37,12 @@ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"tru PREHOOK: query: analyze table testAvro compute statistics for columns col1,col3 PREHOOK: type: QUERY PREHOOK: Input: default@testavro +PREHOOK: Output: default@testavro #### A masked pattern was here #### POSTHOOK: query: analyze table testAvro compute statistics for columns col1,col3 POSTHOOK: type: QUERY POSTHOOK: Input: default@testavro +POSTHOOK: Output: default@testavro #### A masked pattern was here #### PREHOOK: query: describe formatted testAvro col1 PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/compute_stats_date.q.out ql/src/test/results/clientpositive/compute_stats_date.q.out index e738c25fc9..dd26cfd3d2 100644 --- ql/src/test/results/clientpositive/compute_stats_date.q.out +++ ql/src/test/results/clientpositive/compute_stats_date.q.out @@ -89,7 +89,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: fl_date Column Types: date @@ -98,10 +99,12 @@ STAGE PLANS: PREHOOK: query: analyze table tab_date compute statistics for columns fl_date PREHOOK: type: QUERY PREHOOK: Input: default@tab_date +PREHOOK: Output: default@tab_date #### A masked pattern was here #### POSTHOOK: query: analyze table tab_date compute statistics for columns fl_date POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_date +POSTHOOK: Output: default@tab_date #### A masked pattern was here #### PREHOOK: query: describe formatted tab_date fl_date PREHOOK: type: DESCTABLE @@ -112,7 +115,7 @@ POSTHOOK: Input: default@tab_date # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector fl_date date 2000-11-20 2010-10-29 0 19 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"fl_date\":\"true\"}} PREHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') PREHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: query: alter table tab_date update statistics for column fl_date set ('numDVs'='19', 'highValue'='2015-01-01', 'lowValue'='0') @@ -126,4 +129,4 @@ POSTHOOK: Input: default@tab_date # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector fl_date date 1970-01-01 2015-01-01 0 19 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"fl_date\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"fl_date\":\"true\"}} diff --git ql/src/test/results/clientpositive/constGby.q.out ql/src/test/results/clientpositive/constGby.q.out index 7115be3611..c633624935 100644 --- ql/src/test/results/clientpositive/constGby.q.out +++ ql/src/test/results/clientpositive/constGby.q.out @@ -17,10 +17,12 @@ POSTHOOK: Output: default@t1 PREHOOK: query: analyze table t1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: explain select count(1) from t1 group by 1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/constant_prop_2.q.out ql/src/test/results/clientpositive/constant_prop_2.q.out index 77a91222fb..7c452451bd 100644 --- ql/src/test/results/clientpositive/constant_prop_2.q.out +++ ql/src/test/results/clientpositive/constant_prop_2.q.out @@ -74,7 +74,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: string, string diff --git ql/src/test/results/clientpositive/constant_prop_3.q.out ql/src/test/results/clientpositive/constant_prop_3.q.out index 8119ccf028..88f25ff717 100644 --- ql/src/test/results/clientpositive/constant_prop_3.q.out +++ ql/src/test/results/clientpositive/constant_prop_3.q.out @@ -51,10 +51,12 @@ POSTHOOK: Output: default@part_hive PREHOOK: query: analyze table part_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@part_hive +PREHOOK: Output: default@part_hive #### A masked pattern was here #### POSTHOOK: query: analyze table part_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@part_hive +POSTHOOK: Output: default@part_hive #### A masked pattern was here #### PREHOOK: query: analyze table partsupp_hive compute statistics PREHOOK: type: QUERY @@ -67,10 +69,12 @@ POSTHOOK: Output: default@partsupp_hive PREHOOK: query: analyze table partsupp_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partsupp_hive +PREHOOK: Output: default@partsupp_hive #### A masked pattern was here #### POSTHOOK: query: analyze table partsupp_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partsupp_hive +POSTHOOK: Output: default@partsupp_hive #### A masked pattern was here #### PREHOOK: query: analyze table supplier_hive compute statistics PREHOOK: type: QUERY @@ -83,10 +87,12 @@ POSTHOOK: Output: default@supplier_hive PREHOOK: query: analyze table supplier_hive compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@supplier_hive +PREHOOK: Output: default@supplier_hive #### A masked pattern was here #### POSTHOOK: query: analyze table supplier_hive compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@supplier_hive +POSTHOOK: Output: default@supplier_hive #### A masked pattern was here #### Warning: Shuffle Join JOIN[25][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select diff --git ql/src/test/results/clientpositive/constprog_dp.q.out ql/src/test/results/clientpositive/constprog_dp.q.out index 8cf301d6f7..4ed3ebf666 100644 --- ql/src/test/results/clientpositive/constprog_dp.q.out +++ ql/src/test/results/clientpositive/constprog_dp.q.out @@ -43,6 +43,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,7 +100,12 @@ STAGE PLANS: name: default.dest Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/constprog_type.q.out ql/src/test/results/clientpositive/constprog_type.q.out index d145d37a3b..27ef1f482b 100644 --- ql/src/test/results/clientpositive/constprog_type.q.out +++ ql/src/test/results/clientpositive/constprog_type.q.out @@ -67,7 +67,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/correlated_join_keys.q.out ql/src/test/results/clientpositive/correlated_join_keys.q.out index 5464cd3e1a..90f5860027 100644 --- ql/src/test/results/clientpositive/correlated_join_keys.q.out +++ ql/src/test/results/clientpositive/correlated_join_keys.q.out @@ -61,10 +61,12 @@ POSTHOOK: Output: default@customer_address PREHOOK: query: analyze table customer_address compute statistics for columns ca_state, ca_zip PREHOOK: type: QUERY PREHOOK: Input: default@customer_address +PREHOOK: Output: default@customer_address #### A masked pattern was here #### POSTHOOK: query: analyze table customer_address compute statistics for columns ca_state, ca_zip POSTHOOK: type: QUERY POSTHOOK: Input: default@customer_address +POSTHOOK: Output: default@customer_address #### A masked pattern was here #### PREHOOK: query: explain select count(*) from customer_address a join customer_address b on (a.ca_zip = b.ca_zip and a.ca_state = b.ca_state) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/correlationoptimizer5.q.out ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 00bdb4caa1..b7c3f5743a 100644 --- ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -106,10 +106,11 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 - Stage-5 is a root stage + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -198,6 +199,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -210,9 +226,36 @@ STAGE PLANS: name: default.dest_co1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -314,8 +357,11 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -351,6 +397,118 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: m Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE @@ -383,85 +541,24 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2956 Data size: 12099 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-2 - Stats-Aggr Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co2 SELECT b.key, d.val @@ -510,21 +607,22 @@ JOIN ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-3 depends on stages: Stage-0 + Stage-15 is a root stage + Stage-11 depends on stages: Stage-15 + Stage-10 depends on stages: Stage-11, Stage-12 , consists of Stage-13, Stage-14, Stage-2 Stage-13 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-13 + Stage-0 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-14 has a backup stage: Stage-2 + Stage-9 depends on stages: Stage-14 Stage-2 - Stage-15 is a root stage - Stage-11 depends on stages: Stage-15 + Stage-16 is a root stage + Stage-12 depends on stages: Stage-16 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -547,7 +645,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -577,10 +675,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -594,7 +692,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -618,6 +716,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -632,9 +745,36 @@ STAGE PLANS: name: default.dest_co3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co3 - Stage: Stage-13 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -648,7 +788,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -672,6 +812,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -712,8 +867,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:$hdt$_1:m @@ -736,7 +906,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/cp_sel.q.out ql/src/test/results/clientpositive/cp_sel.q.out index af2efeb0cf..d467b18140 100644 --- ql/src/test/results/clientpositive/cp_sel.q.out +++ ql/src/test/results/clientpositive/cp_sel.q.out @@ -81,7 +81,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -113,6 +114,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testpartbucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -128,7 +145,41 @@ STAGE PLANS: name: default.testpartbucket Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.testpartbucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/ctas.q.out ql/src/test/results/clientpositive/ctas.q.out index c1b0838060..46e7b8f102 100644 --- ql/src/test/results/clientpositive/ctas.q.out +++ ql/src/test/results/clientpositive/ctas.q.out @@ -98,7 +98,8 @@ STAGE PLANS: name: default.nzhang_CTAS1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -249,7 +250,8 @@ STAGE PLANS: name: default.nzhang_ctas2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -400,7 +402,8 @@ STAGE PLANS: name: default.nzhang_ctas3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -616,7 +619,8 @@ STAGE PLANS: name: default.nzhang_ctas4 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -771,7 +775,8 @@ STAGE PLANS: name: default.nzhang_ctas5 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/ctas_colname.q.out ql/src/test/results/clientpositive/ctas_colname.q.out index 8d61c9dfdd..6399932d7d 100644 --- ql/src/test/results/clientpositive/ctas_colname.q.out +++ ql/src/test/results/clientpositive/ctas_colname.q.out @@ -62,7 +62,8 @@ STAGE PLANS: name: default.summary Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table summary as select *, key + 1, concat(value, value) from src limit 20 PREHOOK: type: CREATETABLE_AS_SELECT @@ -222,7 +223,8 @@ STAGE PLANS: name: default.x4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table x4 as select *, rank() over(partition by key order by value) as rr from src1 PREHOOK: type: CREATETABLE_AS_SELECT @@ -413,7 +415,8 @@ STAGE PLANS: name: default.x5 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table x5 as select *, lead(key,1) over(partition by key order by value) as lead1 from src limit 20 PREHOOK: type: CREATETABLE_AS_SELECT @@ -554,7 +557,8 @@ STAGE PLANS: name: default.x6 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce @@ -735,7 +739,8 @@ STAGE PLANS: name: default.x7 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table x7 as select * from (select *, count(value) from src group by key, value) a PREHOOK: type: CREATETABLE_AS_SELECT @@ -1169,7 +1174,8 @@ STAGE PLANS: name: default.x8 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table x8 as select * from (select *, count(value) from src group by key, value having key < 9) a PREHOOK: type: CREATETABLE_AS_SELECT @@ -1306,7 +1312,8 @@ STAGE PLANS: name: default.x9 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table x9 as select * from (select max(value),key from src group by key having key < 9 AND max(value) IS NOT NULL) a PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/ctas_uses_database_location.q.out ql/src/test/results/clientpositive/ctas_uses_database_location.q.out index ed6d2bce3d..990a8210e9 100644 --- ql/src/test/results/clientpositive/ctas_uses_database_location.q.out +++ ql/src/test/results/clientpositive/ctas_uses_database_location.q.out @@ -74,7 +74,8 @@ STAGE PLANS: name: db1.table_db1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/decimal_stats.q.out ql/src/test/results/clientpositive/decimal_stats.q.out index eacad98e72..5350036091 100644 --- ql/src/test/results/clientpositive/decimal_stats.q.out +++ ql/src/test/results/clientpositive/decimal_stats.q.out @@ -35,10 +35,12 @@ POSTHOOK: Lineage: decimal_1.v EXPRESSION [] PREHOOK: query: analyze table decimal_1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 #### A masked pattern was here #### POSTHOOK: query: analyze table decimal_1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 #### A masked pattern was here #### PREHOOK: query: desc formatted decimal_1 v PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/deleteAnalyze.q.out ql/src/test/results/clientpositive/deleteAnalyze.q.out index d3609f8b6f..2f4af550f3 100644 --- ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -48,7 +48,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 numRows 2 rawDataSize 634 @@ -74,23 +74,27 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -amount decimal(10,3) from deserializer -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +amount decimal(10,3) 12.123 123.123 0 2 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 +PREHOOK: Output: default@testdeci2 #### A masked pattern was here #### POSTHOOK: query: analyze table testdeci2 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@testdeci2 +POSTHOOK: Output: default@testdeci2 #### A masked pattern was here #### PREHOOK: query: explain select s.id, diff --git ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out index 8cbed9accf..33302cb118 100644 --- ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out @@ -99,7 +99,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -122,7 +123,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) outputColumnNames: sourceip, adrevenue, avgtimeonsite @@ -221,7 +223,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -231,10 +235,12 @@ STAGE PLANS: PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE @@ -245,7 +251,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sourceIP string 0 55 12.763636363636364 13 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -255,7 +261,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector avgTimeOnSite int 1 9 0 9 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -265,7 +271,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector adRevenue float 13.099044799804688 492.98870849609375 0 55 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: CREATE TABLE empty_tab( a int, b double, @@ -341,7 +347,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b, c, d, e Column Types: int, double, string, boolean, binary @@ -350,10 +357,12 @@ STAGE PLANS: PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: desc formatted empty_tab a PREHOOK: type: DESCTABLE @@ -444,7 +453,7 @@ POSTHOOK: query: desc extended default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none sourceIP string from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -472,15 +481,17 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sourceIP string 0 55 12.763636363636364 13 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"sourceip\":\"true\"}} PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword POSTHOOK: type: QUERY POSTHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none PREHOOK: query: desc extended UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -488,7 +499,7 @@ POSTHOOK: query: desc extended UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none sKeyword string from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -498,7 +509,7 @@ POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sKeyword string 0 54 7.872727272727273 19 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}} PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -508,4 +519,4 @@ POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sKeyword string 0 54 7.872727272727273 19 HL from deserializer -COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"skeyword\":\"true\"}} +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"skeyword\":\"true\"}} diff --git ql/src/test/results/clientpositive/distinct_stats.q.out ql/src/test/results/clientpositive/distinct_stats.q.out index 73b4addf57..bc2ab02ef5 100644 --- ql/src/test/results/clientpositive/distinct_stats.q.out +++ ql/src/test/results/clientpositive/distinct_stats.q.out @@ -19,10 +19,12 @@ POSTHOOK: Lineage: t1.b SIMPLE [(src)src.FieldSchema(name:value, type:string, co PREHOOK: query: analyze table t1 compute statistics for columns a,b PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns a,b POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: explain select count(distinct b) from t1 group by a diff --git ql/src/test/results/clientpositive/drop_table_with_stats.q.out ql/src/test/results/clientpositive/drop_table_with_stats.q.out index 52aa10a070..119f74a87e 100644 --- ql/src/test/results/clientpositive/drop_table_with_stats.q.out +++ ql/src/test/results/clientpositive/drop_table_with_stats.q.out @@ -30,10 +30,12 @@ PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:tblstatsdb1 @@ -54,10 +56,12 @@ PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable1 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable1 PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:tblstatsdb1 @@ -78,10 +82,12 @@ PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb1@testtable2 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb1@testtable2 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb1@testtable2 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb1@testtable2 PREHOOK: query: DROP TABLE tblstatsdb1.testtable PREHOOK: type: DROPTABLE PREHOOK: Input: tblstatsdb1@testtable @@ -146,10 +152,12 @@ PREHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable POSTHOOK: query: ANALYZE TABLE testtable COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable PREHOOK: query: CREATE TABLE IF NOT EXISTS TestTable1 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: TBLSTATSDB2@TestTable1 @@ -170,10 +178,12 @@ PREHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable1 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable1 POSTHOOK: query: ANALYZE TABLE TestTable1 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable1 PREHOOK: query: CREATE TABLE IF NOT EXISTS TESTTABLE2 (key STRING, value STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: TBLSTATSDB2@TESTTABLE2 @@ -194,10 +204,12 @@ PREHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key PREHOOK: type: QUERY PREHOOK: Input: tblstatsdb2@testtable2 #### A masked pattern was here #### +PREHOOK: Output: tblstatsdb2@testtable2 POSTHOOK: query: ANALYZE TABLE TESTTABLE2 COMPUTE STATISTICS FOR COLUMNS key POSTHOOK: type: QUERY POSTHOOK: Input: tblstatsdb2@testtable2 #### A masked pattern was here #### +POSTHOOK: Output: tblstatsdb2@testtable2 PREHOOK: query: DROP TABLE TBLSTATSDB2.testtable PREHOOK: type: DROPTABLE PREHOOK: Input: tblstatsdb2@testtable diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index d199574b29..b4efaf4511 100644 --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -61,7 +61,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -130,7 +130,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -199,7 +199,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -245,7 +245,7 @@ STAGE PLANS: partcol1 1 partcol2 __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 76d0b7b02f..884e63c44d 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -69,5 +69,10 @@ STAGE PLANS: name: default.non_acid Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.non_acid diff --git ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out index e8645883b7..abf8deeb17 100644 --- ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out +++ ql/src/test/results/clientpositive/encrypted/encryption_insert_values.q.out @@ -49,6 +49,12 @@ Database: default Retention: 0 #### A PARTIAL masked pattern was here #### data/warehouse/default/encrypted_table/.hive-staging Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"tmp_values_col1\":\"true\",\"tmp_values_col2\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index 5d894abc79..18b2eb0ba0 100644 --- ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -616,7 +616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out index 5ecf20d18f..c00837758b 100644 --- ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out +++ ql/src/test/results/clientpositive/encrypted/encryption_move_tbl.q.out @@ -48,10 +48,14 @@ src PREHOOK: query: ANALYZE TABLE encrypted_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@encrypted_table +PREHOOK: Output: default@encrypted_table +PREHOOK: Output: default@encrypted_table #### A PARTIAL masked pattern was here #### data/warehouse/encrypted_table/.hive-staging POSTHOOK: query: ANALYZE TABLE encrypted_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY +POSTHOOK: Output: default@encrypted_table POSTHOOK: Input: default@encrypted_table +POSTHOOK: Output: default@encrypted_table #### A PARTIAL masked pattern was here #### data/warehouse/encrypted_table/.hive-staging PREHOOK: query: DESCRIBE FORMATTED encrypted_table key PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out index 5fd06afe90..90d42fb468 100644 --- ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out +++ ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out @@ -1,6 +1,18 @@ -PREHOOK: query: explain analyze table src compute statistics for columns +PREHOOK: query: create table t as select * from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t as select * from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: t.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain analyze table t compute statistics for columns PREHOOK: type: QUERY -POSTHOOK: query: explain analyze table src compute statistics for columns +POSTHOOK: query: explain analyze table t compute statistics for columns POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage @@ -11,7 +23,7 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: src + alias: t Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -41,17 +53,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: string, string - Table: default.src + Table: default.t -PREHOOK: query: analyze table src compute statistics for columns +PREHOOK: query: analyze table t compute statistics for columns PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@t +PREHOOK: Output: default@t #### A masked pattern was here #### -POSTHOOK: query: analyze table src compute statistics for columns +POSTHOOK: query: analyze table t compute statistics for columns POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/explain_ddl.q.out ql/src/test/results/clientpositive/explain_ddl.q.out index e108e2207c..d985d52e68 100644 --- ql/src/test/results/clientpositive/explain_ddl.q.out +++ ql/src/test/results/clientpositive/explain_ddl.q.out @@ -102,7 +102,8 @@ STAGE PLANS: name: default.M1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce @@ -194,7 +195,8 @@ STAGE PLANS: name: default.M1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce @@ -288,7 +290,8 @@ STAGE PLANS: name: default.M1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce @@ -380,7 +383,8 @@ STAGE PLANS: name: default.V1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce @@ -519,6 +523,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.m1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -540,7 +570,12 @@ STAGE PLANS: name: default.m1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.m1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out index b12d3a1f20..5f427375da 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_date.q.out @@ -33,6 +33,10 @@ PREHOOK: Input: default@date_dim PREHOOK: Input: default@date_dim@d_date_sk=2416945 PREHOOK: Input: default@date_dim@d_date_sk=2416946 PREHOOK: Input: default@date_dim@d_date_sk=2416947 +PREHOOK: Output: default@date_dim +PREHOOK: Output: default@date_dim@d_date_sk=2416945 +PREHOOK: Output: default@date_dim@d_date_sk=2416946 +PREHOOK: Output: default@date_dim@d_date_sk=2416947 #### A masked pattern was here #### POSTHOOK: query: analyze table date_dim partition(d_date_sk) compute statistics for columns POSTHOOK: type: QUERY @@ -40,6 +44,10 @@ POSTHOOK: Input: default@date_dim POSTHOOK: Input: default@date_dim@d_date_sk=2416945 POSTHOOK: Input: default@date_dim@d_date_sk=2416946 POSTHOOK: Input: default@date_dim@d_date_sk=2416947 +POSTHOOK: Output: default@date_dim +POSTHOOK: Output: default@date_dim@d_date_sk=2416945 +POSTHOOK: Output: default@date_dim@d_date_sk=2416946 +POSTHOOK: Output: default@date_dim@d_date_sk=2416947 #### A masked pattern was here #### PREHOOK: query: explain select count(*) from date_dim where d_date > date "1900-01-02" and d_date_sk= 2416945 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index 219e1285d2..f2d6bd9508 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -59,21 +59,29 @@ PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statisti PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2000 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2000 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2000 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2001 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2001 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2001 #### A masked pattern was here #### PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state PREHOOK: type: DESCTABLE @@ -102,7 +110,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -147,7 +155,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -215,7 +223,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -260,7 +268,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -347,41 +355,57 @@ PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2000') compu PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2000 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2000') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2000 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 #### A masked pattern was here #### PREHOOK: query: explain extended select state from loc_orc_2d PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b8bb210ae5..cfe6acc3f0 100644 --- ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -67,21 +67,29 @@ PREHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statisti PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2001 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d partition(year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2001 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2001 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2002 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2002 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d partition(year='2002') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2002 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2002 #### A masked pattern was here #### PREHOOK: query: describe formatted loc_orc_1d PARTITION(year='2001') state PREHOOK: type: DESCTABLE @@ -119,7 +127,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -164,7 +172,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -209,7 +217,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -254,7 +262,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -296,12 +304,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1700 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -322,7 +330,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -367,7 +375,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -412,7 +420,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -457,7 +465,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -499,33 +507,41 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2000 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2000 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2000 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2003 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2003 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d partition(year='2003') compute statistics for columns state POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2003 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2003 #### A masked pattern was here #### PREHOOK: query: explain extended select state from loc_orc_1d PREHOOK: type: QUERY @@ -545,7 +561,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -590,7 +606,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -635,7 +651,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -680,7 +696,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -748,7 +764,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -793,7 +809,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -838,7 +854,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -883,7 +899,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -925,12 +941,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d ( @@ -991,21 +1007,29 @@ PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compu PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94087/year=2002 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2002') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2002 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2002 #### A masked pattern was here #### PREHOOK: query: explain extended select state from loc_orc_2d PREHOOK: type: QUERY @@ -1026,7 +1050,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1072,7 +1096,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1118,7 +1142,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1164,7 +1188,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1256,7 +1280,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1302,7 +1326,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1348,7 +1372,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1394,7 +1418,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1486,7 +1510,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1528,12 +1552,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d @@ -1555,7 +1579,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1601,7 +1625,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1647,7 +1671,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1693,7 +1717,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1785,7 +1809,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1831,7 +1855,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1877,7 +1901,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1923,7 +1947,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2015,7 +2039,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2057,11 +2081,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git ql/src/test/results/clientpositive/filter_join_breaktask.q.out ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 8f9b6363f4..884bfcdacc 100644 --- ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -89,7 +89,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -129,7 +129,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -226,7 +226,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/fm-sketch.q.out ql/src/test/results/clientpositive/fm-sketch.q.out index 7a4d395fb4..1aa81e43ca 100644 --- ql/src/test/results/clientpositive/fm-sketch.q.out +++ ql/src/test/results/clientpositive/fm-sketch.q.out @@ -58,7 +58,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key Column Types: int @@ -67,10 +68,12 @@ STAGE PLANS: PREHOOK: query: analyze table n compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@n +PREHOOK: Output: default@n #### A masked pattern was here #### POSTHOOK: query: analyze table n compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@n +POSTHOOK: Output: default@n #### A masked pattern was here #### PREHOOK: query: desc formatted n key PREHOOK: type: DESCTABLE @@ -142,7 +145,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key Column Types: int @@ -151,10 +155,12 @@ STAGE PLANS: PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE @@ -194,10 +200,12 @@ POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE @@ -237,10 +245,12 @@ POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE @@ -306,10 +316,12 @@ POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__ PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/groupby1.q.out ql/src/test/results/clientpositive/groupby1.q.out index 46e09dd256..d05feec784 100644 --- ql/src/test/results/clientpositive/groupby1.q.out +++ ql/src/test/results/clientpositive/groupby1.q.out @@ -90,7 +90,8 @@ STAGE PLANS: name: default.dest_g1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby10.q.out ql/src/test/results/clientpositive/groupby10.q.out index 66832b02fc..b3acf63338 100644 --- ql/src/test/results/clientpositive/groupby10.q.out +++ ql/src/test/results/clientpositive/groupby10.q.out @@ -44,11 +44,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -119,6 +123,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -131,13 +145,70 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -156,7 +227,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -185,6 +256,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -196,8 +277,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -289,11 +411,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -364,6 +490,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -376,13 +512,70 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -401,7 +594,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -430,6 +623,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -441,8 +644,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -533,9 +777,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -574,6 +820,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -592,6 +848,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -604,7 +870,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -616,8 +917,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git ql/src/test/results/clientpositive/groupby11.q.out ql/src/test/results/clientpositive/groupby11.q.out index 1d0e86ab7d..592be1df85 100644 --- ql/src/test/results/clientpositive/groupby11.q.out +++ ql/src/test/results/clientpositive/groupby11.q.out @@ -32,11 +32,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -107,6 +111,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -121,13 +135,79 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: substr(value, 5) (type: string), key (type: string) sort order: ++ Map-reduce partition columns: substr(value, 5) (type: string) @@ -146,7 +226,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -175,6 +255,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -188,8 +278,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 partition(ds='111') diff --git ql/src/test/results/clientpositive/groupby12.q.out ql/src/test/results/clientpositive/groupby12.q.out index 921fc92b3c..cd0b7fbfb8 100644 --- ql/src/test/results/clientpositive/groupby12.q.out +++ ql/src/test/results/clientpositive/groupby12.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -66,7 +77,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key diff --git ql/src/test/results/clientpositive/groupby1_limit.q.out ql/src/test/results/clientpositive/groupby1_limit.q.out index 78a49ebf6c..1617da50e6 100644 --- ql/src/test/results/clientpositive/groupby1_limit.q.out +++ ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -17,6 +17,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -40,7 +41,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator @@ -66,7 +66,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -88,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,7 +114,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_map.q.out ql/src/test/results/clientpositive/groupby1_map.q.out index cc985a5def..82fe1f0d93 100644 --- ql/src/test/results/clientpositive/groupby1_map.q.out +++ ql/src/test/results/clientpositive/groupby1_map.q.out @@ -15,7 +15,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -71,7 +87,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_map_nomap.q.out ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index cc985a5def..82fe1f0d93 100644 --- ql/src/test/results/clientpositive/groupby1_map_nomap.q.out +++ ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -15,7 +15,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -71,7 +87,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_map_skew.q.out ql/src/test/results/clientpositive/groupby1_map_skew.q.out index 116744a29e..44b4db70c8 100644 --- ql/src/test/results/clientpositive/groupby1_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -16,7 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,7 +112,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby1_noskew.q.out ql/src/test/results/clientpositive/groupby1_noskew.q.out index 98c0d3c28e..ba4366dff8 100644 --- ql/src/test/results/clientpositive/groupby1_noskew.q.out +++ ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -15,7 +15,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -65,7 +76,34 @@ STAGE PLANS: name: default.dest_g1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby2_map.q.out ql/src/test/results/clientpositive/groupby2_map.q.out index 0dcd8109f1..262c5d8f75 100644 --- ql/src/test/results/clientpositive/groupby2_map.q.out +++ ql/src/test/results/clientpositive/groupby2_map.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,7 +89,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 64477dbfd2..581e958cd8 100644 --- ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,7 +89,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) @@ -119,7 +162,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -163,6 +207,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -175,7 +234,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_map_skew.q.out ql/src/test/results/clientpositive/groupby2_map_skew.q.out index 813ae5cb26..02f5b47460 100644 --- ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -18,7 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -86,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,7 +114,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_noskew.q.out ql/src/test/results/clientpositive/groupby2_noskew.q.out index 5192db3966..29a71f1710 100644 --- ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -66,7 +77,34 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index 1c24213fba..f1ce8388d7 100644 --- ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,7 +78,34 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby3.q.out ql/src/test/results/clientpositive/groupby3.q.out index 2ebeae450b..7c97174830 100644 --- ql/src/test/results/clientpositive/groupby3.q.out +++ ql/src/test/results/clientpositive/groupby3.q.out @@ -36,7 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -73,6 +74,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: @@ -93,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -105,7 +122,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_map.q.out ql/src/test/results/clientpositive/groupby3_map.q.out index 07c122e2d9..edad22b93f 100644 --- ql/src/test/results/clientpositive/groupby3_map.q.out +++ ql/src/test/results/clientpositive/groupby3_map.q.out @@ -77,6 +77,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +109,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index a4501f7f7a..20344640e6 100644 --- ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -81,6 +81,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -93,7 +113,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_map_skew.q.out ql/src/test/results/clientpositive/groupby3_map_skew.q.out index e02bdeb904..e53e62c2ce 100644 --- ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -100,6 +100,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -112,7 +132,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_noskew.q.out ql/src/test/results/clientpositive/groupby3_noskew.q.out index 624fd2db54..1aa4cb6ce6 100644 --- ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -70,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,7 +98,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index a1d403d6cf..bb964e66d6 100644 --- ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -74,6 +74,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,7 +102,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/groupby4.q.out ql/src/test/results/clientpositive/groupby4.q.out index 3f77e47bd1..34f2099792 100644 --- ql/src/test/results/clientpositive/groupby4.q.out +++ ql/src/test/results/clientpositive/groupby4.q.out @@ -18,7 +18,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +74,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,7 +96,56 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby4_map.q.out ql/src/test/results/clientpositive/groupby4_map.q.out index 97915e76f8..4ed785f0fa 100644 --- ql/src/test/results/clientpositive/groupby4_map.q.out +++ ql/src/test/results/clientpositive/groupby4_map.q.out @@ -53,6 +53,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -65,7 +85,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby4_map_skew.q.out ql/src/test/results/clientpositive/groupby4_map_skew.q.out index ae83f7ac65..e6ae7e8c23 100644 --- ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -53,6 +53,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -65,7 +85,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby4_noskew.q.out ql/src/test/results/clientpositive/groupby4_noskew.q.out index c7db0d7016..0fa7046230 100644 --- ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +50,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -61,7 +72,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby5.q.out ql/src/test/results/clientpositive/groupby5.q.out index 9bf01ee51b..1677ab3a02 100644 --- ql/src/test/results/clientpositive/groupby5.q.out +++ ql/src/test/results/clientpositive/groupby5.q.out @@ -22,7 +22,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -84,6 +86,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,7 +108,56 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) diff --git ql/src/test/results/clientpositive/groupby5_map.q.out ql/src/test/results/clientpositive/groupby5_map.q.out index 5fbd3d7dad..d04eb44b3c 100644 --- ql/src/test/results/clientpositive/groupby5_map.q.out +++ ql/src/test/results/clientpositive/groupby5_map.q.out @@ -55,6 +55,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +87,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby5_map_skew.q.out ql/src/test/results/clientpositive/groupby5_map_skew.q.out index 60b010b501..6fa3fc1188 100644 --- ql/src/test/results/clientpositive/groupby5_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -55,6 +55,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +87,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/groupby5_noskew.q.out ql/src/test/results/clientpositive/groupby5_noskew.q.out index 612a0f6112..9cd1cccdcf 100644 --- ql/src/test/results/clientpositive/groupby5_noskew.q.out +++ ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -21,7 +21,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -71,7 +82,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) diff --git ql/src/test/results/clientpositive/groupby6.q.out ql/src/test/results/clientpositive/groupby6.q.out index b79022405b..a8cd73852f 100644 --- ql/src/test/results/clientpositive/groupby6.q.out +++ ql/src/test/results/clientpositive/groupby6.q.out @@ -18,7 +18,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +74,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,7 +96,56 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby6_map.q.out ql/src/test/results/clientpositive/groupby6_map.q.out index 4ba3772298..1d89c1a7fc 100644 --- ql/src/test/results/clientpositive/groupby6_map.q.out +++ ql/src/test/results/clientpositive/groupby6_map.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -66,7 +82,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby6_map_skew.q.out ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 5141c0d9b3..bc7ee51593 100644 --- ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -18,7 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,7 +105,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby6_noskew.q.out ql/src/test/results/clientpositive/groupby6_noskew.q.out index fd796c7b74..bf818a0935 100644 --- ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +50,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -61,7 +72,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/groupby7_map.q.out ql/src/test/results/clientpositive/groupby7_map.q.out index 0ef29cd29f..76d1aa4f9b 100644 --- ql/src/test/results/clientpositive/groupby7_map.q.out +++ ql/src/test/results/clientpositive/groupby7_map.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,13 +119,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -133,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,8 +211,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index 7c3b033a62..4c477974b4 100644 --- ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -87,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,7 +131,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -111,8 +178,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_map_skew.q.out ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 4bfa52ed89..a98511a002 100644 --- ql/src/test/results/clientpositive/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -28,11 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -116,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -128,13 +145,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) @@ -154,7 +206,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -183,6 +235,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -194,8 +261,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_noskew.q.out ql/src/test/results/clientpositive/groupby7_noskew.q.out index 6178f58f7e..7309ec281b 100644 --- ql/src/test/results/clientpositive/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -78,6 +80,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -90,13 +102,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) @@ -121,6 +168,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -132,8 +189,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index f38c428781..486979fef0 100644 --- ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -28,10 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8 + Stage-5 depends on stages: Stage-3 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -108,6 +110,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -120,13 +132,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +199,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -163,8 +220,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 diff --git ql/src/test/results/clientpositive/groupby8.q.out ql/src/test/results/clientpositive/groupby8.q.out index 1856a9252b..03c6a18b97 100644 --- ql/src/test/results/clientpositive/groupby8.q.out +++ ql/src/test/results/clientpositive/groupby8.q.out @@ -28,11 +28,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -103,6 +107,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -115,13 +129,70 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -140,7 +211,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -169,6 +240,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -180,8 +261,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -849,11 +971,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-0, Stage-6, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-1, Stage-6, Stage-11 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -924,6 +1050,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -936,13 +1072,70 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -961,7 +1154,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -990,6 +1183,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1001,8 +1204,49 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby8_map.q.out ql/src/test/results/clientpositive/groupby8_map.q.out index f683a8ba71..5022edbe75 100644 --- ql/src/test/results/clientpositive/groupby8_map.q.out +++ ql/src/test/results/clientpositive/groupby8_map.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,7 +130,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -110,8 +177,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby8_map_skew.q.out ql/src/test/results/clientpositive/groupby8_map_skew.q.out index 5e60d3e924..51cfb9b03a 100644 --- ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -28,11 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -115,6 +117,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -127,13 +144,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -152,7 +204,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -181,6 +233,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -192,8 +259,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby8_noskew.q.out ql/src/test/results/clientpositive/groupby8_noskew.q.out index f683a8ba71..eabc90b3a7 100644 --- ql/src/test/results/clientpositive/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +70,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +98,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,7 +120,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -110,8 +167,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby9.q.out ql/src/test/results/clientpositive/groupby9.q.out index 15ea1857c0..2daa452479 100644 --- ql/src/test/results/clientpositive/groupby9.q.out +++ ql/src/test/results/clientpositive/groupby9.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,13 +118,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -131,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,8 +209,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -811,10 +897,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -873,6 +961,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -885,13 +988,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -915,6 +1053,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -926,8 +1079,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -1595,10 +1767,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1657,6 +1831,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1669,13 +1858,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -1699,6 +1923,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1710,8 +1949,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -2379,10 +2637,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -2442,6 +2702,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -2454,13 +2729,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -2485,6 +2795,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -2496,8 +2821,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -3165,10 +3509,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -3227,6 +3573,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -3239,13 +3600,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -3269,6 +3665,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -3280,8 +3691,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/groupby_complex_types.q.out ql/src/test/results/clientpositive/groupby_complex_types.q.out index b16a4ade68..107eea3a1d 100644 --- ql/src/test/results/clientpositive/groupby_complex_types.q.out +++ ql/src/test/results/clientpositive/groupby_complex_types.q.out @@ -127,7 +127,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-5 Map Reduce @@ -166,7 +167,8 @@ STAGE PLANS: name: default.dest2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-7 Map Reduce @@ -205,7 +207,8 @@ STAGE PLANS: name: default.dest3 Stage: Stage-8 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) diff --git ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out index caa5395031..bfddf74adc 100644 --- ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out @@ -126,7 +126,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-5 Map Reduce @@ -191,7 +192,8 @@ STAGE PLANS: name: default.dest2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 diff --git ql/src/test/results/clientpositive/groupby_cube1.q.out ql/src/test/results/clientpositive/groupby_cube1.q.out index 9acccf3b45..94f1ac0528 100644 --- ql/src/test/results/clientpositive/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -551,11 +551,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -640,6 +642,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -652,13 +669,48 @@ STAGE PLANS: name: default.t2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -678,7 +730,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -708,6 +760,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -719,8 +786,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube diff --git ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index 1e13288c9a..9a6457cef2 100644 --- ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -33,10 +33,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -92,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,13 +121,48 @@ STAGE PLANS: name: default.t1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) @@ -130,6 +182,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -141,6 +208,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/groupby_duplicate_key.q.out ql/src/test/results/clientpositive/groupby_duplicate_key.q.out index 6e19930927..d1d7f8d269 100644 --- ql/src/test/results/clientpositive/groupby_duplicate_key.q.out +++ ql/src/test/results/clientpositive/groupby_duplicate_key.q.out @@ -143,7 +143,8 @@ STAGE PLANS: name: default.dummy Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table dummy as select distinct key, "X" as dummy1, "X" as dummy2 from src tablesample (10 rows) diff --git ql/src/test/results/clientpositive/groupby_map_ppr.q.out ql/src/test/results/clientpositive/groupby_map_ppr.q.out index 0e5b394215..1a3d0e6448 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +199,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -228,8 +256,83 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index dbcef22473..02cb101d0a 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +199,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -228,8 +256,83 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2168 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2216 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2216 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index 5f02b04c38..be442dc1c9 100644 --- ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,13 +118,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) @@ -131,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,8 +209,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 856 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table dest1 select key, count(distinct value) group by key diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 256784d3d7..13ec761d4e 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-3 Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -97,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +136,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -139,6 +172,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -151,7 +199,34 @@ STAGE PLANS: name: default.dest_g4 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -163,8 +238,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -176,8 +278,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) @@ -276,16 +405,21 @@ STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-5 Stage-0 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-0 Stage-9 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-10 - Stage-11 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-9 - Stage-12 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-5 + Stage-12 depends on stages: Stage-5 + Stage-13 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-13 + Stage-14 depends on stages: Stage-3 + Stage-15 depends on stages: Stage-13 + Stage-4 depends on stages: Stage-12 + Stage-16 depends on stages: Stage-4 + Stage-17 depends on stages: Stage-12 STAGE PLANS: Stage: Stage-5 @@ -335,6 +469,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -356,6 +505,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -377,6 +541,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -389,7 +568,34 @@ STAGE PLANS: name: default.dest_g4 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -401,8 +607,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -414,10 +647,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-8 - Stats-Aggr Operator + Stage: Stage-10 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 - Stage: Stage-9 + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -467,8 +727,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -476,7 +751,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Select Operator @@ -498,6 +772,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Move Operator @@ -509,8 +798,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 - Stage: Stage-11 - Stats-Aggr Operator + Stage: Stage-14 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Move Operator @@ -522,8 +838,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 - Stage: Stage-12 - Stats-Aggr Operator + Stage: Stage-16 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index 012b2114b4..000caa660f 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -27,9 +27,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -71,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +109,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,7 +136,42 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 Stage: Stage-1 Move Operator @@ -116,8 +183,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index e41d9ef021..d356bf253d 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -39,9 +39,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -83,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -116,7 +148,42 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -128,8 +195,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 @@ -205,9 +291,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -249,6 +337,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -270,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -282,7 +400,42 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -294,8 +447,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 @@ -371,9 +543,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -415,6 +589,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -436,6 +625,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -448,7 +652,42 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -460,8 +699,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 @@ -537,9 +795,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -581,6 +841,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -602,6 +877,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -614,7 +904,42 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -626,8 +951,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table e1 diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index 2ec9ecd614..97f90d09e5 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -87,6 +89,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,13 +116,48 @@ STAGE PLANS: name: default.testtable1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -129,6 +181,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -140,8 +207,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 @@ -217,10 +303,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -277,6 +365,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -289,13 +392,48 @@ STAGE PLANS: name: default.testtable1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -319,6 +457,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -330,8 +483,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 diff --git ql/src/test/results/clientpositive/groupby_ppr.q.out ql/src/test/results/clientpositive/groupby_ppr.q.out index e645f5f598..4cf530e8c5 100644 --- ql/src/test/results/clientpositive/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +192,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -221,8 +244,83 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index f300095bd7..ff5e74c24c 100644 --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +192,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -221,8 +244,83 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -284,7 +382,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -431,7 +530,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -452,6 +551,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -462,7 +583,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -482,8 +603,87 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/groupby_rollup1.q.out ql/src/test/results/clientpositive/groupby_rollup1.q.out index e050d0af29..5ccf8f2285 100644 --- ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -396,11 +396,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-8 depends on stages: Stage-1, Stage-5, Stage-9 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -485,6 +487,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -497,13 +514,48 @@ STAGE PLANS: name: default.t2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -523,7 +575,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -553,6 +605,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -564,8 +631,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup diff --git ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 1f12c52f6a..cbfd6bf627 100644 --- ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -106,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +177,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -200,8 +245,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -427,7 +478,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -555,6 +607,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -585,8 +664,83 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -666,7 +820,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -687,6 +841,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -742,6 +912,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -761,7 +960,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -781,8 +980,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -798,7 +1003,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -828,7 +1033,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -849,7 +1054,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -885,7 +1090,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -915,7 +1120,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -936,7 +1141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1040,7 +1245,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1061,6 +1266,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1116,6 +1337,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1135,7 +1385,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1155,8 +1405,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1172,7 +1428,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1202,7 +1458,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1223,7 +1479,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1259,7 +1515,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1289,7 +1545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1310,7 +1566,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1443,6 +1699,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1498,6 +1770,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1537,8 +1838,14 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1765,7 +2072,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1893,6 +2201,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1923,41 +2258,116 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 +8 1 18 1 +8 1 28 1 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl3 +SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 +PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -1965,7 +2375,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2072,7 +2483,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2093,6 +2504,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2103,7 +2541,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2123,8 +2561,83 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -2167,7 +2680,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2284,7 +2798,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2305,6 +2819,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2315,7 +2856,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2335,8 +2876,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from @@ -2428,7 +3044,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2449,6 +3065,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2480,7 +3112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2501,6 +3133,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2556,6 +3204,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1, null-subquery2:$hdt$_0-subquery2:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2575,7 +3252,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2595,8 +3272,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2612,7 +3295,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2642,7 +3325,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2663,7 +3346,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2699,7 +3382,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2729,7 +3412,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2750,7 +3433,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2990,7 +3673,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3011,6 +3694,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3030,7 +3729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3051,6 +3750,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3129,6 +3844,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3148,7 +3892,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3168,8 +3912,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3185,7 +3935,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3215,7 +3965,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3236,7 +3986,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3272,7 +4022,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3302,7 +4052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3323,7 +4073,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3408,7 +4158,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3540,7 +4291,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3561,6 +4312,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3571,7 +4349,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3591,8 +4369,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -3940,7 +4793,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3979,7 +4833,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4002,7 +4856,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4048,7 +4902,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4069,6 +4923,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4079,7 +4960,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4095,12 +4976,87 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4178,7 +5134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4199,6 +5155,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4208,7 +5180,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4231,7 +5203,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4254,6 +5226,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4273,7 +5274,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4293,8 +5294,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -4310,7 +5317,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4340,7 +5347,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4361,7 +5368,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4397,7 +5404,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4427,7 +5434,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4448,7 +5455,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4584,6 +5591,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4593,7 +5616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4616,7 +5639,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4639,6 +5662,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4678,8 +5730,14 @@ STAGE PLANS: name: default.outputtbl5 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -4947,7 +6005,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4968,6 +6026,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4977,7 +6051,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5000,7 +6074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5023,6 +6097,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5042,7 +6145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5062,8 +6165,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5079,7 +6188,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5109,7 +6218,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5130,7 +6239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5166,7 +6275,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5196,7 +6305,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5217,7 +6326,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5338,7 +6447,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5359,6 +6468,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5368,7 +6493,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5391,7 +6516,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5414,6 +6539,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5433,7 +6587,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5453,8 +6607,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5470,7 +6630,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5500,7 +6660,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5521,7 +6681,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5557,7 +6717,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5587,7 +6747,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5608,7 +6768,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5703,9 +6863,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5753,6 +6915,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5772,6 +6949,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5784,7 +6976,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -5796,8 +7023,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -5858,9 +7104,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5911,6 +7159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5930,6 +7193,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5942,7 +7220,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -5954,8 +7267,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_2.q.out ql/src/test/results/clientpositive/groupby_sort_2.q.out index bb6273e37d..25ed6b6f8f 100644 --- ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -45,7 +45,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,7 +118,34 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val diff --git ql/src/test/results/clientpositive/groupby_sort_3.q.out ql/src/test/results/clientpositive/groupby_sort_3.q.out index 2dae25d085..be9a131014 100644 --- ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -81,6 +81,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -102,7 +128,12 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce @@ -216,6 +247,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -237,7 +294,12 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/groupby_sort_4.q.out ql/src/test/results/clientpositive/groupby_sort_4.q.out index 70e8ac7d8d..c2ab1bdb91 100644 --- ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -45,7 +45,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,7 +118,34 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key @@ -148,7 +191,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -192,6 +236,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -204,7 +263,34 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val diff --git ql/src/test/results/clientpositive/groupby_sort_5.q.out ql/src/test/results/clientpositive/groupby_sort_5.q.out index db18928faa..7a7b8df4aa 100644 --- ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -81,6 +81,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -102,7 +128,12 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce @@ -244,6 +275,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -265,7 +322,12 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce @@ -379,7 +441,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -424,6 +487,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -436,7 +514,34 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_6.q.out ql/src/test/results/clientpositive/groupby_sort_6.q.out index 60019e7f99..91cd119b04 100644 --- ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -102,6 +103,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -132,8 +160,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key @@ -175,7 +278,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -231,7 +335,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -252,6 +356,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -262,7 +393,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -282,8 +413,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key @@ -316,7 +522,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -419,7 +626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -440,6 +647,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -450,7 +684,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -470,8 +704,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_7.q.out ql/src/test/results/clientpositive/groupby_sort_7.q.out index 9d535e25b0..a9e469b948 100644 --- ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -84,6 +84,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -105,7 +131,12 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index fba8adbd49..f15c0f90ab 100644 --- ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -106,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +177,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -200,8 +245,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -428,7 +479,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -624,6 +676,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -654,8 +733,83 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -735,7 +889,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -756,6 +910,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -811,6 +981,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -830,7 +1029,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -850,8 +1049,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -867,7 +1072,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -897,7 +1102,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -918,7 +1123,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -954,7 +1159,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -984,7 +1189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1005,7 +1210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1109,7 +1314,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1130,6 +1335,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1185,6 +1406,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1204,7 +1454,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1224,8 +1474,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1241,7 +1497,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1271,7 +1527,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1292,7 +1548,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1328,7 +1584,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1358,7 +1614,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1379,7 +1635,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1512,6 +1768,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1567,6 +1839,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1606,8 +1907,14 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -1835,7 +2142,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2031,6 +2339,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2061,35 +2396,110 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: EXPLAIN EXTENDED @@ -2104,7 +2514,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2279,7 +2690,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2300,6 +2711,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2310,7 +2748,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2330,8 +2768,83 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -2375,7 +2888,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2560,7 +3074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2581,6 +3095,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2591,7 +3132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2611,8 +3152,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from @@ -2704,7 +3320,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2725,6 +3341,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2756,7 +3388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2777,6 +3409,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2832,6 +3480,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1, null-subquery2:$hdt$_0-subquery2:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2851,7 +3528,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2871,8 +3548,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2888,7 +3571,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2918,7 +3601,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2939,7 +3622,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2975,7 +3658,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3005,7 +3688,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3026,7 +3709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3335,7 +4018,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3356,6 +4039,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3375,7 +4074,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3396,6 +4095,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3474,6 +4189,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3493,7 +4237,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3513,8 +4257,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3530,7 +4280,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3560,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3581,7 +4331,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3617,7 +4367,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3647,7 +4397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3668,7 +4418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3753,7 +4503,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3885,7 +4636,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3906,6 +4657,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3916,7 +4694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3936,8 +4714,83 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -4355,7 +5208,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -4394,7 +5248,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4417,7 +5271,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4531,7 +5385,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4552,6 +5406,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4562,7 +5443,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4578,12 +5459,87 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4661,7 +5617,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4682,6 +5638,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4691,7 +5663,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4714,7 +5686,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4737,6 +5709,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4756,7 +5757,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4776,8 +5777,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -4793,7 +5800,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4823,7 +5830,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4844,7 +5851,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4880,7 +5887,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4910,7 +5917,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4931,7 +5938,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5067,6 +6074,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(key4, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5076,7 +6099,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5099,7 +6122,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5122,6 +6145,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5161,8 +6213,14 @@ STAGE PLANS: name: default.outputtbl5 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5430,7 +6488,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5451,6 +6509,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5460,7 +6534,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5483,7 +6557,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5506,6 +6580,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5525,7 +6628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5545,8 +6648,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5562,7 +6671,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5592,7 +6701,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5613,7 +6722,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5649,7 +6758,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5679,7 +6788,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5700,7 +6809,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5821,7 +6930,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5842,6 +6951,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(key3, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5851,7 +6976,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5874,7 +6999,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5897,6 +7022,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5916,7 +7070,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5936,8 +7090,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -5953,7 +7113,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5983,7 +7143,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6004,7 +7164,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6040,7 +7200,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6070,7 +7230,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6091,7 +7251,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6187,9 +7347,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6237,6 +7399,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6280,6 +7457,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6292,7 +7484,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -6304,8 +7531,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -6367,9 +7613,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6420,6 +7668,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6463,6 +7726,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6475,7 +7753,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 Stage: Stage-1 Move Operator @@ -6487,8 +7800,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git ql/src/test/results/clientpositive/groupby_sort_test_1.q.out ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index d06cd7cce0..ffe6515bbd 100644 --- ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -81,6 +81,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -102,7 +128,12 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/hll.q.out ql/src/test/results/clientpositive/hll.q.out index bb880b2d18..2e7fd280ee 100644 --- ql/src/test/results/clientpositive/hll.q.out +++ ql/src/test/results/clientpositive/hll.q.out @@ -58,7 +58,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key Column Types: int @@ -67,10 +68,12 @@ STAGE PLANS: PREHOOK: query: analyze table n compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@n +PREHOOK: Output: default@n #### A masked pattern was here #### POSTHOOK: query: analyze table n compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@n +POSTHOOK: Output: default@n #### A masked pattern was here #### PREHOOK: query: desc formatted n key PREHOOK: type: DESCTABLE @@ -142,7 +145,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key Column Types: int @@ -151,10 +155,12 @@ STAGE PLANS: PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE @@ -194,10 +200,12 @@ POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE @@ -237,10 +245,12 @@ POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE @@ -306,10 +316,12 @@ POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__ PREHOOK: query: analyze table i compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@i +PREHOOK: Output: default@i #### A masked pattern was here #### POSTHOOK: query: analyze table i compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@i +POSTHOOK: Output: default@i #### A masked pattern was here #### PREHOOK: query: desc formatted i key PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index c4b9dc4c62..a515847ed4 100644 --- ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -56,6 +56,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.implicit_cast_during_insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: c1, c2, p1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + keys: p1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,7 +91,12 @@ STAGE PLANS: name: default.implicit_cast_during_insert Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.implicit_cast_during_insert PREHOOK: query: insert overwrite table implicit_cast_during_insert partition (p1) select key, value, key key1 from (select * from src where key in (0,1)) q diff --git ql/src/test/results/clientpositive/index_auto_update.q.out ql/src/test/results/clientpositive/index_auto_update.q.out index e7bc0690ad..e48b657434 100644 --- ql/src/test/results/clientpositive/index_auto_update.q.out +++ ql/src/test/results/clientpositive/index_auto_update.q.out @@ -45,10 +45,10 @@ STAGE DEPENDENCIES: Stage-0 depends on stages: Stage-9, Stage-8, Stage-11 Stage-2 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-5 Stage-4 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-0, Stage-4, Stage-5 Stage-5 depends on stages: Stage-2 - Stage-7 depends on stages: Stage-0 Stage-8 Stage-10 Stage-11 depends on stages: Stage-10 @@ -72,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-12 Conditional Operator @@ -135,14 +161,20 @@ STAGE PLANS: name: default.default__temp_temp_index__ Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-4 - Stage: Stage-5 - Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: string, string + Table: default.temp + + Stage: Stage-5 Stage: Stage-8 Map Reduce diff --git ql/src/test/results/clientpositive/infer_bucket_sort.q.out ql/src/test/results/clientpositive/infer_bucket_sort.q.out index f09f01d971..ae2435c9fd 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -193,7 +193,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -244,7 +244,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 4970 @@ -295,7 +295,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -346,7 +346,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -397,7 +397,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -448,7 +448,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -499,7 +499,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -550,7 +550,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -601,7 +601,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -652,7 +652,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -703,7 +703,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -754,7 +754,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -805,7 +805,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -856,7 +856,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -907,7 +907,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -958,7 +958,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 2964 @@ -1009,7 +1009,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 5 rawDataSize 19 @@ -1060,7 +1060,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1111,7 +1111,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1162,7 +1162,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1213,7 +1213,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1264,7 +1264,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 3582 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index d62d0b8d2f..78528b65f5 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -58,7 +58,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -98,7 +98,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -161,7 +161,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -201,7 +201,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -266,7 +266,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 155 rawDataSize 586 @@ -306,7 +306,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 154 rawDataSize 591 @@ -427,10 +427,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -474,6 +475,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -498,7 +515,12 @@ STAGE PLANS: name: default.test_table Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table Stage: Stage-3 Merge File Operator @@ -520,6 +542,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart @@ -568,7 +619,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 4 rawDataSize 14 @@ -608,7 +659,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 305 rawDataSize 1163 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index a448ef312c..065f963315 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +69,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -82,7 +99,41 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, value) PREHOOK: type: QUERY @@ -747,7 +798,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 6309 @@ -1429,7 +1480,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 7547 @@ -1455,7 +1506,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1500,6 +1552,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1514,7 +1582,41 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE @@ -1552,7 +1654,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 9954 @@ -1607,7 +1709,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 11810 @@ -1633,7 +1735,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1678,6 +1781,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(agg, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1692,7 +1811,41 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) @@ -1730,7 +1883,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 6054 @@ -1785,7 +1938,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 7290 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out index 98a2f5f3c0..c8d1492840 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out @@ -46,7 +46,7 @@ Database: default Table: list_bucketing_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -116,7 +116,7 @@ Database: default Table: list_bucketing_table2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index 32edd73030..30a3eb8035 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -91,6 +91,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -114,7 +148,12 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out Stage: Stage-3 Map Reduce @@ -180,7 +219,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -217,7 +256,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -281,6 +321,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -295,7 +351,41 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( @@ -343,7 +433,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 @@ -403,6 +493,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -426,7 +544,12 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out Stage: Stage-4 Map Reduce @@ -494,7 +617,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -522,7 +645,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -567,6 +691,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -581,7 +719,37 @@ STAGE PLANS: name: default.test_table_out Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key @@ -621,7 +789,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2728 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out index bf77d4ce5a..bf4daf29eb 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out index 59b20fe4da..00797b2471 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out @@ -85,7 +85,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -181,7 +181,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2718 @@ -277,7 +277,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -373,7 +373,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2690 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index 0c61fe0212..048d09814c 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -56,6 +57,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -71,7 +88,41 @@ STAGE PLANS: name: default.test_table Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM @@ -123,7 +174,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -163,7 +214,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git ql/src/test/results/clientpositive/innerjoin.q.out ql/src/test/results/clientpositive/innerjoin.q.out index 99b3d856eb..138eb9390c 100644 --- ql/src/test/results/clientpositive/innerjoin.q.out +++ ql/src/test/results/clientpositive/innerjoin.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,7 +103,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/input11.q.out ql/src/test/results/clientpositive/input11.q.out index bb22ee86df..bb59b4f992 100644 --- ql/src/test/results/clientpositive/input11.q.out +++ ql/src/test/results/clientpositive/input11.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input11_limit.q.out ql/src/test/results/clientpositive/input11_limit.q.out index 597554e02f..8fb7b630fc 100644 --- ql/src/test/results/clientpositive/input11_limit.q.out +++ ql/src/test/results/clientpositive/input11_limit.q.out @@ -73,7 +73,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 diff --git ql/src/test/results/clientpositive/input12.q.out ql/src/test/results/clientpositive/input12.q.out index 3bb765cf78..d908b937c1 100644 --- ql/src/test/results/clientpositive/input12.q.out +++ ql/src/test/results/clientpositive/input12.q.out @@ -39,24 +39,16 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-11, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 - Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-1 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-1, Stage-11, Stage-13 + Stage-11 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2, Stage-11, Stage-13 + Stage-2 depends on stages: Stage-3 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -80,6 +72,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +130,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -131,7 +180,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-5 Map Reduce @@ -163,15 +217,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -183,46 +228,42 @@ STAGE PLANS: name: default.dest2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-12 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 Stage: Stage-2 Move Operator @@ -237,38 +278,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-16 - Stats-Aggr Operator - - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git ql/src/test/results/clientpositive/input13.q.out ql/src/test/results/clientpositive/input13.q.out index 91ff23429b..a03af9c141 100644 --- ql/src/test/results/clientpositive/input13.q.out +++ ql/src/test/results/clientpositive/input13.q.out @@ -41,30 +41,17 @@ STAGE DEPENDENCIES: Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 Stage-7 Stage-0 depends on stages: Stage-7, Stage-6, Stage-9 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-12, Stage-14, Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 - Stage-16 depends on stages: Stage-4 , consists of Stage-13, Stage-12, Stage-14 - Stage-13 - Stage-1 depends on stages: Stage-13, Stage-12, Stage-15 - Stage-11 depends on stages: Stage-1 - Stage-12 - Stage-14 - Stage-15 depends on stages: Stage-14 - Stage-22 depends on stages: Stage-4 , consists of Stage-19, Stage-18, Stage-20 - Stage-19 - Stage-2 depends on stages: Stage-19, Stage-18, Stage-21 - Stage-17 depends on stages: Stage-2 - Stage-18 - Stage-20 - Stage-21 depends on stages: Stage-20 - Stage-27 depends on stages: Stage-4 , consists of Stage-24, Stage-23, Stage-25 - Stage-24 - Stage-3 depends on stages: Stage-24, Stage-23, Stage-26 - Stage-23 - Stage-25 - Stage-26 depends on stages: Stage-25 + Stage-1 depends on stages: Stage-4 + Stage-11 depends on stages: Stage-1, Stage-12, Stage-14, Stage-3 + Stage-12 depends on stages: Stage-4 + Stage-13 depends on stages: Stage-2, Stage-12, Stage-14, Stage-3 + Stage-2 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 @@ -88,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((key >= 200) and (key < 300)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +133,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +163,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -153,7 +197,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-6 Map Reduce @@ -185,15 +234,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-16 - Conditional Operator - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -205,46 +245,42 @@ STAGE PLANS: name: default.dest2 Stage: Stage-11 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-12 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-15 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-22 - Conditional Operator + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-19 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-13 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 Stage: Stage-2 Move Operator @@ -259,47 +295,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-17 - Stats-Aggr Operator - - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-21 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-27 - Conditional Operator - - Stage: Stage-24 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### Stage: Stage-3 Move Operator @@ -307,34 +330,6 @@ STAGE PLANS: hdfs directory: true destination: target/warehouse/dest4.out - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-25 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-26 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 diff --git ql/src/test/results/clientpositive/input14.q.out ql/src/test/results/clientpositive/input14.q.out index af04a9896d..516c68467c 100644 --- ql/src/test/results/clientpositive/input14.q.out +++ ql/src/test/results/clientpositive/input14.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -81,7 +97,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input14_limit.q.out ql/src/test/results/clientpositive/input14_limit.q.out index 9870ad53b4..ac061d1508 100644 --- ql/src/test/results/clientpositive/input14_limit.q.out +++ ql/src/test/results/clientpositive/input14_limit.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,7 +54,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -79,7 +79,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -104,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -116,7 +130,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input17.q.out ql/src/test/results/clientpositive/input17.q.out index 057a92d163..f1ba03abb9 100644 --- ql/src/test/results/clientpositive/input17.q.out +++ ql/src/test/results/clientpositive/input17.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,7 +94,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src_thrift diff --git ql/src/test/results/clientpositive/input18.q.out ql/src/test/results/clientpositive/input18.q.out index b341510502..1be623ed9d 100644 --- ql/src/test/results/clientpositive/input18.q.out +++ ql/src/test/results/clientpositive/input18.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -81,7 +97,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input1_limit.q.out ql/src/test/results/clientpositive/input1_limit.q.out index 0ca1552ef2..1e898ea535 100644 --- ql/src/test/results/clientpositive/input1_limit.q.out +++ ql/src/test/results/clientpositive/input1_limit.q.out @@ -27,10 +27,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -52,7 +54,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (key < 100) (type: boolean) @@ -90,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,7 +118,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-4 Map Reduce @@ -110,8 +131,37 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -133,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,8 +209,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 diff --git ql/src/test/results/clientpositive/input20.q.out ql/src/test/results/clientpositive/input20.q.out index 9587445f82..b931f9b368 100644 --- ql/src/test/results/clientpositive/input20.q.out +++ ql/src/test/results/clientpositive/input20.q.out @@ -33,7 +33,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -82,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -94,7 +110,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input30.q.out ql/src/test/results/clientpositive/input30.q.out index 478cea1861..84e4ae4786 100644 --- ql/src/test/results/clientpositive/input30.q.out +++ ql/src/test/results/clientpositive/input30.q.out @@ -64,6 +64,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -76,7 +96,12 @@ STAGE PLANS: name: default.tst_dest30 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest30 PREHOOK: query: insert overwrite table dest30 select count(1) from src @@ -147,6 +172,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -159,7 +204,12 @@ STAGE PLANS: name: default.dest30 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.dest30 PREHOOK: query: insert overwrite table dest30 select count(1) from src diff --git ql/src/test/results/clientpositive/input31.q.out ql/src/test/results/clientpositive/input31.q.out index ea2c8f95b3..3ba7c5f87f 100644 --- ql/src/test/results/clientpositive/input31.q.out +++ ql/src/test/results/clientpositive/input31.q.out @@ -66,6 +66,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest31 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -78,7 +98,12 @@ STAGE PLANS: name: default.tst_dest31 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest31 PREHOOK: query: insert overwrite table dest31 select count(1) from srcbucket diff --git ql/src/test/results/clientpositive/input32.q.out ql/src/test/results/clientpositive/input32.q.out index d3426a8dfa..4da8c75e2c 100644 --- ql/src/test/results/clientpositive/input32.q.out +++ ql/src/test/results/clientpositive/input32.q.out @@ -63,6 +63,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest32 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,7 +95,12 @@ STAGE PLANS: name: default.tst_dest32 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest32 PREHOOK: query: insert overwrite table dest32 select count(1) from srcbucket diff --git ql/src/test/results/clientpositive/input33.q.out ql/src/test/results/clientpositive/input33.q.out index 4be5fc5d61..fe05db2c2c 100644 --- ql/src/test/results/clientpositive/input33.q.out +++ ql/src/test/results/clientpositive/input33.q.out @@ -33,7 +33,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -82,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -94,7 +110,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/input34.q.out ql/src/test/results/clientpositive/input34.q.out index 72f66c3f17..b6e0c054dd 100644 --- ql/src/test/results/clientpositive/input34.q.out +++ ql/src/test/results/clientpositive/input34.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -85,7 +111,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input35.q.out ql/src/test/results/clientpositive/input35.q.out index 8b869918e1..7e89fb477f 100644 --- ql/src/test/results/clientpositive/input35.q.out +++ ql/src/test/results/clientpositive/input35.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -85,7 +111,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input36.q.out ql/src/test/results/clientpositive/input36.q.out index 76921ad6b8..64e34cc539 100644 --- ql/src/test/results/clientpositive/input36.q.out +++ ql/src/test/results/clientpositive/input36.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -85,7 +111,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input38.q.out ql/src/test/results/clientpositive/input38.q.out index 0c4e81d0ef..cd459078a2 100644 --- ql/src/test/results/clientpositive/input38.q.out +++ ql/src/test/results/clientpositive/input38.q.out @@ -58,6 +58,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -79,7 +105,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input4.q.out ql/src/test/results/clientpositive/input4.q.out index 69843183a5..151aa0c4c1 100644 --- ql/src/test/results/clientpositive/input4.q.out +++ ql/src/test/results/clientpositive/input4.q.out @@ -28,7 +28,8 @@ STAGE PLANS: name: default.input4 Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE INPUT4 PREHOOK: type: LOAD diff --git ql/src/test/results/clientpositive/input5.q.out ql/src/test/results/clientpositive/input5.q.out index a39952878d..ceb28c0bc6 100644 --- ql/src/test/results/clientpositive/input5.q.out +++ ql/src/test/results/clientpositive/input5.q.out @@ -27,7 +27,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,7 +94,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM src_thrift diff --git ql/src/test/results/clientpositive/input6.q.out ql/src/test/results/clientpositive/input6.q.out index 3d1a815cf6..f8183cdea6 100644 --- ql/src/test/results/clientpositive/input6.q.out +++ ql/src/test/results/clientpositive/input6.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input7.q.out ql/src/test/results/clientpositive/input7.q.out index 0545b1f774..0b7279a664 100644 --- ql/src/test/results/clientpositive/input7.q.out +++ ql/src/test/results/clientpositive/input7.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -64,7 +90,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: double, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input8.q.out ql/src/test/results/clientpositive/input8.q.out index d76fc2bc02..47c616196a 100644 --- ql/src/test/results/clientpositive/input8.q.out +++ ql/src/test/results/clientpositive/input8.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -64,7 +90,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input9.q.out ql/src/test/results/clientpositive/input9.q.out index af752e0af9..32ead4f837 100644 --- ql/src/test/results/clientpositive/input9.q.out +++ ql/src/test/results/clientpositive/input9.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, key + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_columnarserde.q.out ql/src/test/results/clientpositive/input_columnarserde.q.out index afa0e28d17..95d0e6e7ec 100644 --- ql/src/test/results/clientpositive/input_columnarserde.q.out +++ ql/src/test/results/clientpositive/input_columnarserde.q.out @@ -70,7 +70,8 @@ STAGE PLANS: name: default.input_columnarserde Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src_thrift INSERT OVERWRITE TABLE input_columnarserde SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring DISTRIBUTE BY 1 diff --git ql/src/test/results/clientpositive/input_dynamicserde.q.out ql/src/test/results/clientpositive/input_dynamicserde.q.out index 30493be357..a117873164 100644 --- ql/src/test/results/clientpositive/input_dynamicserde.q.out +++ ql/src/test/results/clientpositive/input_dynamicserde.q.out @@ -76,7 +76,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_lazyserde.q.out ql/src/test/results/clientpositive/input_lazyserde.q.out index 64dc6c14a4..473b5fe4e6 100644 --- ql/src/test/results/clientpositive/input_lazyserde.q.out +++ ql/src/test/results/clientpositive/input_lazyserde.q.out @@ -76,7 +76,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src_thrift INSERT OVERWRITE TABLE dest1 SELECT src_thrift.lint, src_thrift.lstring, src_thrift.mstringstring, src_thrift.aint, src_thrift.astring DISTRIBUTE BY 1 diff --git ql/src/test/results/clientpositive/input_part1.q.out ql/src/test/results/clientpositive/input_part1.q.out index a68544671f..31ce924066 100644 --- ql/src/test/results/clientpositive/input_part1.q.out +++ ql/src/test/results/clientpositive/input_part1.q.out @@ -72,6 +72,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +141,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -164,8 +209,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_part10.q.out ql/src/test/results/clientpositive/input_part10.q.out index 4b552badc6..daa9527ee0 100644 --- ql/src/test/results/clientpositive/input_part10.q.out +++ ql/src/test/results/clientpositive/input_part10.q.out @@ -29,7 +29,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +49,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator @@ -70,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_special + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008 04 08' (type: string), '10:11:12=455' (type: string) + outputColumnNames: a, b, ds, ts + Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + keys: ds (type: string), ts (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -85,7 +101,41 @@ STAGE PLANS: name: default.part_special Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: string, string + Table: default.part_special + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1070 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455') SELECT 1, 2 FROM src LIMIT 1 diff --git ql/src/test/results/clientpositive/input_part2.q.out ql/src/test/results/clientpositive/input_part2.q.out index d5524feabc..496185f610 100644 --- ql/src/test/results/clientpositive/input_part2.q.out +++ ql/src/test/results/clientpositive/input_part2.q.out @@ -29,17 +29,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -89,6 +85,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -129,6 +141,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,6 +270,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -270,8 +338,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -453,15 +527,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -491,188 +556,83 @@ STAGE PLANS: name: default.dest2 Stage: Stage-9 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","hr":"true","key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + MultiFileSpray: false PREHOOK: query: FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' diff --git ql/src/test/results/clientpositive/input_part5.q.out ql/src/test/results/clientpositive/input_part5.q.out index c6ae2fd58d..33ecd59709 100644 --- ql/src/test/results/clientpositive/input_part5.q.out +++ ql/src/test/results/clientpositive/input_part5.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,7 +93,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_testsequencefile.q.out ql/src/test/results/clientpositive/input_testsequencefile.q.out index 60aaf83df5..847d045d59 100644 --- ql/src/test/results/clientpositive/input_testsequencefile.q.out +++ ql/src/test/results/clientpositive/input_testsequencefile.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4_sequencefile + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -64,7 +90,12 @@ STAGE PLANS: name: default.dest4_sequencefile Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest4_sequencefile Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_testxpath.q.out ql/src/test/results/clientpositive/input_testxpath.q.out index e07628aaea..ddda55eeba 100644 --- ql/src/test/results/clientpositive/input_testxpath.q.out +++ ql/src/test/results/clientpositive/input_testxpath.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, mapvalue + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(mapvalue, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -64,7 +90,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, mapvalue + Column Types: int, string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/input_testxpath2.q.out ql/src/test/results/clientpositive/input_testxpath2.q.out index a0baccf925..5c40743fe7 100644 --- ql/src/test/results/clientpositive/input_testxpath2.q.out +++ ql/src/test/results/clientpositive/input_testxpath2.q.out @@ -46,6 +46,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: lint_size, lintstring_size, mstringstring_size + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(lint_size, 'hll'), compute_stats(lintstring_size, 'hll'), compute_stats(mstringstring_size, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: lint_size, lintstring_size, mstringstring_size + Column Types: int, int, int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert1.q.out ql/src/test/results/clientpositive/insert1.q.out index 39525787c9..a14db208b6 100644 --- ql/src/test/results/clientpositive/insert1.q.out +++ ql/src/test/results/clientpositive/insert1.q.out @@ -60,6 +60,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -81,7 +107,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-3 Map Reduce @@ -149,6 +180,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -170,7 +227,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-3 Map Reduce @@ -252,6 +314,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -273,7 +361,12 @@ STAGE PLANS: name: x.insert1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 Stage: Stage-3 Map Reduce @@ -341,6 +434,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -362,7 +481,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-3 Map Reduce @@ -409,17 +533,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -443,6 +563,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -458,6 +591,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -479,7 +640,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-4 Map Reduce @@ -511,15 +677,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -531,37 +688,34 @@ STAGE PLANS: name: x.insert1 Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out index 49c1269cc1..b17bc11f89 100644 --- ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out +++ ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out @@ -41,7 +41,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -58,7 +59,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -75,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string), '11' (type: string) + outputColumnNames: one, two, ds, hr + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -90,7 +106,41 @@ STAGE PLANS: name: default.destintable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', hr='11') if not exists SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 @@ -174,7 +224,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,7 +242,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -208,6 +258,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: one, two + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -220,7 +285,34 @@ STAGE PLANS: name: default.destintable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE destinTable SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out index b5f28d289a..77913f0a5e 100644 --- ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out +++ ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out @@ -52,7 +52,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,7 +70,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -86,6 +86,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,7 +116,41 @@ STAGE PLANS: name: db2.destintable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 @@ -140,7 +190,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -157,7 +208,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -174,6 +224,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 'hll'), compute_stats(two, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -188,7 +254,41 @@ STAGE PLANS: name: db2.destintable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 diff --git ql/src/test/results/clientpositive/insert_into1.q.out ql/src/test/results/clientpositive/insert_into1.q.out index da863a7185..404d9159fd 100644 --- ql/src/test/results/clientpositive/insert_into1.q.out +++ ql/src/test/results/clientpositive/insert_into1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -34,7 +35,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -56,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,7 +83,34 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -125,7 +167,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -142,7 +185,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -164,6 +206,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -176,7 +233,34 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -233,7 +317,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -250,7 +335,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -272,6 +356,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -284,7 +383,34 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY @@ -368,6 +494,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -389,7 +541,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 Stage: Stage-3 Map Reduce @@ -465,6 +622,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -486,7 +669,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_into2.q.out ql/src/test/results/clientpositive/insert_into2.q.out index 46fab7b416..efffd5372f 100644 --- ql/src/test/results/clientpositive/insert_into2.q.out +++ ql/src/test/results/clientpositive/insert_into2.q.out @@ -21,7 +21,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -38,7 +39,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -60,6 +60,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -74,7 +90,41 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY @@ -170,7 +220,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -187,7 +238,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -209,6 +259,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -223,7 +289,41 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 @@ -288,7 +388,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -305,7 +406,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -327,6 +427,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -341,7 +457,41 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 diff --git ql/src/test/results/clientpositive/insert_into3.q.out ql/src/test/results/clientpositive/insert_into3.q.out index ae7523b217..77fca861ab 100644 --- ql/src/test/results/clientpositive/insert_into3.q.out +++ ql/src/test/results/clientpositive/insert_into3.q.out @@ -31,10 +31,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -51,7 +53,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -82,6 +83,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -94,17 +110,51 @@ STAGE PLANS: name: default.insert_into3a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -125,6 +175,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -136,8 +201,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 @@ -190,10 +274,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -212,7 +298,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Select Operator expressions: key (type: string), value (type: string) @@ -247,6 +332,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -259,7 +359,12 @@ STAGE PLANS: name: default.insert_into3a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a Stage: Stage-4 Map Reduce @@ -267,8 +372,37 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -290,6 +424,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,8 +450,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 diff --git ql/src/test/results/clientpositive/insert_into4.q.out ql/src/test/results/clientpositive/insert_into4.q.out index bb4e5571a6..927d087849 100644 --- ql/src/test/results/clientpositive/insert_into4.q.out +++ ql/src/test/results/clientpositive/insert_into4.q.out @@ -29,7 +29,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +49,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -70,6 +70,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -82,7 +97,34 @@ STAGE PLANS: name: default.insert_into4a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY @@ -114,7 +156,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -133,7 +176,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -155,6 +197,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -167,7 +224,34 @@ STAGE PLANS: name: default.insert_into4a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY @@ -225,6 +309,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -246,7 +356,12 @@ STAGE PLANS: name: default.insert_into4b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_into5.q.out ql/src/test/results/clientpositive/insert_into5.q.out index 7b471f4433..66dd50276c 100644 --- ql/src/test/results/clientpositive/insert_into5.q.out +++ ql/src/test/results/clientpositive/insert_into5.q.out @@ -29,7 +29,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +49,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -66,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,7 +93,34 @@ STAGE PLANS: name: default.insert_into5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src LIMIT 10 PREHOOK: type: QUERY @@ -136,6 +178,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -157,7 +225,12 @@ STAGE PLANS: name: default.insert_into5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a Stage: Stage-3 Map Reduce @@ -247,6 +320,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -270,7 +377,12 @@ STAGE PLANS: name: default.insert_into5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b Stage: Stage-3 Map Reduce @@ -362,6 +474,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -385,7 +531,12 @@ STAGE PLANS: name: default.insert_into5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_into6.q.out ql/src/test/results/clientpositive/insert_into6.q.out index d93a167a74..c0fc7bd01b 100644 --- ql/src/test/results/clientpositive/insert_into6.q.out +++ ql/src/test/results/clientpositive/insert_into6.q.out @@ -31,7 +31,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -50,7 +51,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -72,6 +72,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,7 +102,41 @@ STAGE PLANS: name: default.insert_into6a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE insert_into6a PARTITION (ds='1') SELECT * FROM src LIMIT 150 PREHOOK: type: QUERY @@ -160,6 +210,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -183,7 +267,12 @@ STAGE PLANS: name: default.insert_into6b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out index 6df425fcec..eb4c506e69 100644 --- ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out +++ ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out @@ -364,7 +364,7 @@ Table Parameters: numFiles 1 numRows 0 rawDataSize 0 - totalSize 1554 + totalSize 1555 transactional true #### A masked pattern was here #### @@ -392,9 +392,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_ivot - Statistics: Num rows: 1 Data size: 1554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1555 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 1554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1555 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -486,7 +486,7 @@ Table Parameters: numFiles 2 numRows 0 rawDataSize 0 - totalSize 3109 + totalSize 3110 transactional true #### A masked pattern was here #### @@ -514,9 +514,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_ivot - Statistics: Num rows: 1 Data size: 3109 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 3110 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 3109 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 3110 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -606,7 +606,7 @@ Table Parameters: numFiles 3 numRows 0 rawDataSize 0 - totalSize 298592 + totalSize 298593 transactional true #### A masked pattern was here #### @@ -634,9 +634,9 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_ivot - Statistics: Num rows: 1 Data size: 298592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 298593 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 298592 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 298593 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -745,7 +745,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out index c2732c8896..fec7d484fa 100644 --- ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out +++ ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out @@ -99,7 +99,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -130,6 +131,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: change, num + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(change, 'hll'), compute_stats(num, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -142,7 +158,34 @@ STAGE PLANS: name: default.temp1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: change, num + Column Types: string, string + Table: default.temp1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE TABLE temp2 ( @@ -177,7 +220,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -209,6 +253,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: create_ts, change, num + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(create_ts, 'hll'), compute_stats(change, 'hll'), compute_stats(num, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -221,7 +280,34 @@ STAGE PLANS: name: default.temp2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: create_ts, change, num + Column Types: string, string, string + Table: default.temp2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/join14.q.out ql/src/test/results/clientpositive/join14.q.out index 66e42f1992..cb456e23d7 100644 --- ql/src/test/results/clientpositive/join14.q.out +++ ql/src/test/results/clientpositive/join14.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,7 +103,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value diff --git ql/src/test/results/clientpositive/join17.q.out ql/src/test/results/clientpositive/join17.q.out index f9edc792eb..e6aac08a05 100644 --- ql/src/test/results/clientpositive/join17.q.out +++ ql/src/test/results/clientpositive/join17.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -162,6 +163,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -192,8 +220,83 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/join2.q.out ql/src/test/results/clientpositive/join2.q.out index e3d26a29a6..b2067421ec 100644 --- ql/src/test/results/clientpositive/join2.q.out +++ ql/src/test/results/clientpositive/join2.q.out @@ -129,7 +129,8 @@ STAGE PLANS: name: default.dest_j2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/join25.q.out ql/src/test/results/clientpositive/join25.q.out index 5ad95c507b..30a62ce00e 100644 --- ql/src/test/results/clientpositive/join25.q.out +++ ql/src/test/results/clientpositive/join25.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,7 +109,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/join26.q.out ql/src/test/results/clientpositive/join26.q.out index b41fd8efe1..870a735d8e 100644 --- ql/src/test/results/clientpositive/join26.q.out +++ ql/src/test/results/clientpositive/join26.q.out @@ -19,13 +19,14 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.key = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -74,7 +75,7 @@ STAGE PLANS: 2 _col0 (type: string) Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -136,6 +137,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -313,8 +341,83 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/join27.q.out ql/src/test/results/clientpositive/join27.q.out index 8b43f3f32a..a8f4ff4389 100644 --- ql/src/test/results/clientpositive/join27.q.out +++ ql/src/test/results/clientpositive/join27.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,7 +109,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/join28.q.out ql/src/test/results/clientpositive/join28.q.out index 309bdcd200..3b29191e67 100644 --- ql/src/test/results/clientpositive/join28.q.out +++ ql/src/test/results/clientpositive/join28.q.out @@ -23,13 +23,14 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -70,7 +71,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -115,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -129,7 +145,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value diff --git ql/src/test/results/clientpositive/join29.q.out ql/src/test/results/clientpositive/join29.q.out index ef02385fe1..02521dda72 100644 --- ql/src/test/results/clientpositive/join29.q.out +++ ql/src/test/results/clientpositive/join29.q.out @@ -20,15 +20,16 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 Stage-2 - Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -66,10 +67,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -83,7 +84,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -107,6 +108,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -121,9 +137,36 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 - Stage: Stage-9 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -137,7 +180,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -161,6 +204,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -202,8 +260,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/join3.q.out ql/src/test/results/clientpositive/join3.q.out index fb378f403a..87fb0574b0 100644 --- ql/src/test/results/clientpositive/join3.q.out +++ ql/src/test/results/clientpositive/join3.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -92,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,7 +120,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/join30.q.out ql/src/test/results/clientpositive/join30.q.out index f06c70ae40..d29ef1b2a6 100644 --- ql/src/test/results/clientpositive/join30.q.out +++ ql/src/test/results/clientpositive/join30.q.out @@ -15,13 +15,14 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -110,7 +126,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key diff --git ql/src/test/results/clientpositive/join31.q.out ql/src/test/results/clientpositive/join31.q.out index e055b5d2fa..abcb32fa71 100644 --- ql/src/test/results/clientpositive/join31.q.out +++ ql/src/test/results/clientpositive/join31.q.out @@ -22,10 +22,11 @@ group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -60,7 +61,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -133,6 +134,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -145,7 +161,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt diff --git ql/src/test/results/clientpositive/join32.q.out ql/src/test/results/clientpositive/join32.q.out index 176989caff..47c88b82c3 100644 --- ql/src/test/results/clientpositive/join32.q.out +++ ql/src/test/results/clientpositive/join32.q.out @@ -19,13 +19,14 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -120,7 +121,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -189,6 +190,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -366,8 +394,83 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/join33.q.out ql/src/test/results/clientpositive/join33.q.out index 176989caff..47c88b82c3 100644 --- ql/src/test/results/clientpositive/join33.q.out +++ ql/src/test/results/clientpositive/join33.q.out @@ -19,13 +19,14 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -120,7 +121,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -189,6 +190,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -366,8 +394,83 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/join34.q.out ql/src/test/results/clientpositive/join34.q.out index 67599bc991..7675ad9870 100644 --- ql/src/test/results/clientpositive/join34.q.out +++ ql/src/test/results/clientpositive/join34.q.out @@ -27,13 +27,14 @@ FROM JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 - Stage-2 depends on stages: Stage-0 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -59,7 +60,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -121,6 +122,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -180,6 +208,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -309,8 +364,83 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value diff --git ql/src/test/results/clientpositive/join35.q.out ql/src/test/results/clientpositive/join35.q.out index ade6646caa..4590ef946d 100644 --- ql/src/test/results/clientpositive/join35.q.out +++ ql/src/test/results/clientpositive/join35.q.out @@ -28,11 +28,12 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 - Stage-4 is a root stage + Stage-8 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 + Stage-3 depends on stages: Stage-0, Stage-4 + Stage-4 depends on stages: Stage-7 + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -139,7 +140,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -165,7 +166,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -217,6 +218,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan GatherStats: false Union @@ -266,6 +294,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -295,7 +350,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -393,13 +448,88 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true Stage: Stage-4 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false diff --git ql/src/test/results/clientpositive/join36.q.out ql/src/test/results/clientpositive/join36.q.out index 43a091f629..40dbabb1d0 100644 --- ql/src/test/results/clientpositive/join36.q.out +++ ql/src/test/results/clientpositive/join36.q.out @@ -57,13 +57,14 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -86,7 +87,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -119,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -133,7 +149,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt diff --git ql/src/test/results/clientpositive/join37.q.out ql/src/test/results/clientpositive/join37.q.out index b0a2ee3dfa..4effb31d9a 100644 --- ql/src/test/results/clientpositive/join37.q.out +++ ql/src/test/results/clientpositive/join37.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +80,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,7 +109,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/join39.q.out ql/src/test/results/clientpositive/join39.q.out index c656762675..80272a4caf 100644 --- ql/src/test/results/clientpositive/join39.q.out +++ ql/src/test/results/clientpositive/join39.q.out @@ -17,13 +17,14 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src @@ -46,7 +47,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -72,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(key1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -86,7 +102,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value diff --git ql/src/test/results/clientpositive/join4.q.out ql/src/test/results/clientpositive/join4.q.out index 10b7fbd965..2d84b2e185 100644 --- ql/src/test/results/clientpositive/join4.q.out +++ ql/src/test/results/clientpositive/join4.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -110,7 +126,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join5.q.out ql/src/test/results/clientpositive/join5.q.out index 105a731526..dc9ff64664 100644 --- ql/src/test/results/clientpositive/join5.q.out +++ ql/src/test/results/clientpositive/join5.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -110,7 +126,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join6.q.out ql/src/test/results/clientpositive/join6.q.out index 77c8c3a322..b1c9be61c6 100644 --- ql/src/test/results/clientpositive/join6.q.out +++ ql/src/test/results/clientpositive/join6.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -110,7 +126,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join7.q.out ql/src/test/results/clientpositive/join7.q.out index fec67b2651..3be60e515b 100644 --- ql/src/test/results/clientpositive/join7.q.out +++ ql/src/test/results/clientpositive/join7.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +127,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -138,7 +154,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join8.q.out ql/src/test/results/clientpositive/join8.q.out index 185b5421a3..adca8b262a 100644 --- ql/src/test/results/clientpositive/join8.q.out +++ ql/src/test/results/clientpositive/join8.q.out @@ -39,7 +39,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -101,6 +102,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -113,7 +129,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/join9.q.out ql/src/test/results/clientpositive/join9.q.out index a96f341c78..6db5185b20 100644 --- ql/src/test/results/clientpositive/join9.q.out +++ ql/src/test/results/clientpositive/join9.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -210,6 +211,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -240,8 +268,83 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: #### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' diff --git ql/src/test/results/clientpositive/join_map_ppr.q.out ql/src/test/results/clientpositive/join_map_ppr.q.out index a4d414089e..c5acb9b351 100644 --- ql/src/test/results/clientpositive/join_map_ppr.q.out +++ ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -131,6 +131,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -186,6 +202,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -225,8 +270,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true Stage: Stage-5 Map Reduce @@ -688,7 +739,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -709,6 +760,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -764,6 +831,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -783,7 +879,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -803,8 +899,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true Stage: Stage-5 Map Reduce @@ -820,7 +922,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -850,7 +952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -871,7 +973,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -907,7 +1009,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -937,7 +1039,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -958,7 +1060,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 diff --git ql/src/test/results/clientpositive/lb_fs_stats.q.out ql/src/test/results/clientpositive/lb_fs_stats.q.out index b07192b895..9bc96e4685 100644 --- ql/src/test/results/clientpositive/lb_fs_stats.q.out +++ ql/src/test/results/clientpositive/lb_fs_stats.q.out @@ -46,7 +46,7 @@ Database: default Table: test_tab #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/limit_pushdown_negative.q.out ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index eeb47f7fe5..7bcde29a90 100644 --- ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -226,10 +226,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -269,6 +271,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -296,7 +313,12 @@ STAGE PLANS: name: default.dest_2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_2 Stage: Stage-4 Map Reduce @@ -304,8 +326,37 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_3 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -327,6 +378,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -338,6 +404,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/lineage1.q.out ql/src/test/results/clientpositive/lineage1.q.out index 6c8a22fa9c..beee1294d5 100644 --- ql/src/test/results/clientpositive/lineage1.q.out +++ ql/src/test/results/clientpositive/lineage1.q.out @@ -111,6 +111,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -126,6 +139,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -147,7 +186,12 @@ STAGE PLANS: name: default.dest_l1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_l1 Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/list_bucket_dml_1.q.out ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index f70f8b2b9a..e3a26fb34b 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -65,6 +65,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +186,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -197,8 +250,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY @@ -240,7 +299,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -285,7 +344,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -357,7 +416,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_11.q.out ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 6920dcc7ca..405f7afc2a 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +139,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -150,8 +203,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src @@ -195,7 +254,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -238,7 +297,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_12.q.out ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index 55acbb7f5a..ffde804ff9 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +139,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -150,8 +203,14 @@ STAGE PLANS: name: default.list_bucketing_mul_col Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') select 1, key, 1, value, 1 from src @@ -201,7 +260,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +305,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 @@ -338,7 +397,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git ql/src/test/results/clientpositive/list_bucket_dml_13.q.out ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index 3a1d2a436a..091f6adb0a 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 'hll'), compute_stats(col2, 'hll'), compute_stats(col3, 'hll'), compute_stats(col4, 'hll'), compute_stats(col5, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +139,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -150,8 +203,14 @@ STAGE PLANS: name: default.list_bucketing_mul_col Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') select 1, key, 1, value, 1 from src @@ -201,7 +260,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +305,7 @@ STAGE PLANS: ds 2008-04-08 hr 2013-01-23+18:00:99 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git ql/src/test/results/clientpositive/list_bucket_dml_14.q.out ql/src/test/results/clientpositive/list_bucket_dml_14.q.out index f827991243..ca96d84180 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_14.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_14.q.out @@ -65,6 +65,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -116,6 +132,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -146,8 +191,14 @@ STAGE PLANS: name: default.list_bucketing Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing + Is Table Level Stats: true PREHOOK: query: insert overwrite table list_bucketing select * from src PREHOOK: type: QUERY @@ -177,7 +228,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -283,7 +334,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -304,7 +355,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 01bc19cbbc..087ef7f9bd 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -201,8 +254,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -250,7 +309,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -311,7 +370,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_3.q.out ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index 52646a2609..12d633386b 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -59,6 +59,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +180,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -191,8 +244,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' PREHOOK: type: QUERY @@ -231,7 +290,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 10624 @@ -303,7 +362,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_4.q.out ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 84ada2bd49..23a9c33c6b 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -201,8 +254,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -250,7 +309,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -331,6 +390,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +511,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -471,8 +583,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -624,7 +742,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +803,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_5.q.out ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 71d7e16409..983a660bcc 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -65,6 +65,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +186,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -197,8 +250,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY @@ -240,7 +299,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -285,7 +344,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -362,7 +421,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +467,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index 8e20ff1301..189dec8208 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -201,8 +254,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' @@ -254,7 +313,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +356,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -379,6 +438,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +559,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -520,8 +632,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -681,7 +799,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +842,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -785,7 +903,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +949,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 58bf240f1b..149b5ebcc0 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -201,8 +254,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' @@ -254,7 +313,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +356,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -379,6 +438,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +559,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -520,8 +632,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -681,7 +799,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +842,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 984 rawDataSize 9488 @@ -785,7 +903,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +949,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index b38d332e09..7b0f6f36f3 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -201,8 +254,14 @@ STAGE PLANS: name: default.list_bucketing_dynamic_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' @@ -254,7 +313,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +356,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -414,7 +473,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_dml_9.q.out ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 624e3aca41..78fdb0bf35 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -69,6 +69,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +190,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -201,8 +254,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -250,7 +309,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -331,6 +390,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +511,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -471,8 +583,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false Stage: Stage-3 Merge File Operator @@ -624,7 +742,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +803,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index 17c7afe411..98b9b1304a 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -349,7 +349,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 5a326f667c..4cafefa812 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -268,7 +268,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 3ff221da35..9931afd435 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -43,7 +43,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -103,7 +103,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -168,7 +168,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -211,7 +211,7 @@ STAGE PLANS: ds 1 hr 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -322,7 +322,7 @@ STAGE PLANS: ds 1 hr 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +408,7 @@ STAGE PLANS: ds 1 hr 3 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 34dd4877e8..bd02c52ffd 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -168,7 +168,8 @@ STAGE PLANS: name: default.srcpart_acid Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update srcpart_acid set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' PREHOOK: type: QUERY @@ -346,7 +347,8 @@ STAGE PLANS: name: default.srcpart_acid Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: delete from srcpart_acid where key in( '1001', '213', '43') PREHOOK: type: QUERY @@ -662,7 +664,8 @@ STAGE PLANS: name: default.srcpart_acidb Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update srcpart_acidb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' PREHOOK: type: QUERY @@ -841,7 +844,8 @@ STAGE PLANS: name: default.srcpart_acidb Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: delete from srcpart_acidb where key in( '1001', '213', '43') PREHOOK: type: QUERY @@ -1156,7 +1160,8 @@ STAGE PLANS: name: default.srcpart_acidv Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update srcpart_acidv set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' PREHOOK: type: QUERY @@ -1334,7 +1339,8 @@ STAGE PLANS: name: default.srcpart_acidv Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: delete from srcpart_acidv where key in( '1001', '213', '43') PREHOOK: type: QUERY @@ -1650,7 +1656,8 @@ STAGE PLANS: name: default.srcpart_acidvb Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update srcpart_acidvb set value = concat(value, 'updated') where cast(key as integer) in(413,43) and hr='11' PREHOOK: type: QUERY @@ -1829,7 +1836,8 @@ STAGE PLANS: name: default.srcpart_acidvb Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: delete from srcpart_acidvb where key in( '1001', '213', '43') PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out index 0d5ba01960..f0cf74ca81 100644 --- ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out +++ ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -241,7 +241,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -290,7 +290,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out new file mode 100644 index 0000000000..09529d26e5 --- /dev/null +++ ql/src/test/results/clientpositive/llap/autoColumnStats_10.q.out @@ -0,0 +1,452 @@ +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p values (1,22,333) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,22,333) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +#### A masked pattern was here #### + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int 1 1 0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: insert into p values (2,11,111) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (2,11,111) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +#### A masked pattern was here #### + numFiles 2 + numRows 2 + rawDataSize 16 + totalSize 18 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int 1 2 0 1 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"insert_num\":\"true\"}} +PREHOOK: query: drop table p +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: drop table p +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@p +POSTHOOK: query: CREATE TABLE p(insert_num int, c1 tinyint, c2 smallint) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\",\"insert_num\":\"true\"}} + numFiles 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: insert into p values (1,22,333) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (1,22,333) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 tinyint +c2 smallint + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@p +PREHOOK: Output: default@p +POSTHOOK: query: alter table p replace columns (insert_num int, c1 STRING, c2 STRING) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@p +POSTHOOK: Output: default@p +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +#### A masked pattern was here #### + numFiles 1 + numRows 1 + rawDataSize 8 + totalSize 9 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: insert into p values (2,11,111) +PREHOOK: type: QUERY +PREHOOK: Output: default@p +POSTHOOK: query: insert into p values (2,11,111) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@p +POSTHOOK: Lineage: p.c1 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: p.c2 SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: p.insert_num EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: desc formatted p +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type comment + +insert_num int +c1 string +c2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +#### A masked pattern was here #### + numFiles 2 + numRows 2 + rawDataSize 16 + totalSize 18 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: desc formatted p insert_num +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p insert_num +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +insert_num int from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} +PREHOOK: query: desc formatted p c1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@p +POSTHOOK: query: desc formatted p c1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@p +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +c1 string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} diff --git ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out index a61eafadc9..25cd3756d5 100644 --- ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out +++ ql/src/test/results/clientpositive/llap/autoColumnStats_2.q.out @@ -687,7 +687,6 @@ Database: default Table: alter5 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col1\":\"true\"}} numFiles 1 totalSize 1906 #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/llap/bucket2.q.out ql/src/test/results/clientpositive/llap/bucket2.q.out index e0c92ced9f..10fc171ae1 100644 --- ql/src/test/results/clientpositive/llap/bucket2.q.out +++ ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -140,6 +140,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -174,8 +209,14 @@ STAGE PLANS: name: default.bucket2_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src diff --git ql/src/test/results/clientpositive/llap/bucket3.q.out ql/src/test/results/clientpositive/llap/bucket3.q.out index 8e6d85ca80..9ae5166ded 100644 --- ql/src/test/results/clientpositive/llap/bucket3.q.out +++ ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -138,6 +139,61 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -171,8 +227,14 @@ STAGE PLANS: name: default.bucket3_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src diff --git ql/src/test/results/clientpositive/llap/bucket4.q.out ql/src/test/results/clientpositive/llap/bucket4.q.out index 5fbffc96a9..0a1c49797b 100644 --- ql/src/test/results/clientpositive/llap/bucket4.q.out +++ ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -142,6 +142,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,8 +212,14 @@ STAGE PLANS: name: default.bucket4_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket4_1 select * from src diff --git ql/src/test/results/clientpositive/llap/bucket5.q.out ql/src/test/results/clientpositive/llap/bucket5.q.out index 0b5a14d6ff..1d399416f5 100644 --- ql/src/test/results/clientpositive/llap/bucket5.q.out +++ ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -43,7 +43,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -173,10 +175,57 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -213,6 +262,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -248,8 +344,14 @@ STAGE PLANS: name: default.bucketed_table Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true Stage: Stage-1 Move Operator @@ -280,8 +382,14 @@ STAGE PLANS: name: default.unbucketed_table Stage: Stage-5 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true Stage: Stage-10 Conditional Operator @@ -514,7 +622,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git ql/src/test/results/clientpositive/llap/bucket6.q.out ql/src/test/results/clientpositive/llap/bucket6.q.out index 20895f8a9f..78c044d529 100644 --- ql/src/test/results/clientpositive/llap/bucket6.q.out +++ ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -24,6 +24,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +59,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -73,7 +102,12 @@ STAGE PLANS: name: default.src_bucket Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 1c6ab8c0d0..7557920a4f 100644 --- ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -63,22 +63,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -91,11 +91,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -104,13 +104,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,22 +191,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -219,11 +219,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -232,13 +232,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,22 +292,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: length(key) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -320,13 +320,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -373,22 +373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: abs(length(key)) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -401,13 +401,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -455,22 +455,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -483,11 +483,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -496,13 +496,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -557,22 +557,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: value - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -585,11 +585,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -598,13 +598,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,22 +658,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -685,10 +685,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1043,22 +1043,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 185608 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1070,10 +1070,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 92804 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1158,23 +1158,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() bucketGroup: true keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1187,11 +1187,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1200,13 +1200,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1261,22 +1261,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: value - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1289,11 +1289,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1302,13 +1302,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1464,23 +1464,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: key - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() bucketGroup: true keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1493,11 +1493,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1506,13 +1506,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 46448 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1567,22 +1567,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Execution mode: llap @@ -1595,15 +1595,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1612,13 +1612,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucket_many.q.out ql/src/test/results/clientpositive/llap/bucket_many.q.out index b78cbaa0c3..22027457ed 100644 --- ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -140,6 +141,53 @@ STAGE PLANS: TotalFiles: 256 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -174,8 +222,14 @@ STAGE PLANS: name: default.bucket_many Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket_many + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket_many select * from src diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index f76356658d..4cf44e8bdc 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -132,6 +132,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -140,11 +150,16 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -152,6 +167,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +======= Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -166,6 +212,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -224,26 +271,51 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: key (type: int), value (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -252,11 +324,16 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -266,11 +343,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 237 Data size: 44935 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 237 Data size: 44935 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -279,15 +364,19 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 +<<<<<<< HEAD + Statistics: Num rows: 308 Data size: 2464 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 260 Data size: 49428 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -296,10 +385,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -361,6 +450,62 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 1573 Data size: 12584 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -415,17 +560,32 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: d +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -434,11 +594,16 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -448,10 +613,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -517,6 +682,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -525,6 +700,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -534,11 +710,19 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 +<<<<<<< HEAD + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -547,15 +731,26 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 +<<<<<<< HEAD + Statistics: Num rows: 1573 Data size: 12584 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 574 Data size: 2186 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -563,6 +758,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -571,17 +776,32 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: d +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -590,11 +810,16 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 1808 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -604,10 +829,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -695,6 +920,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -703,6 +938,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -712,6 +948,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 +<<<<<<< HEAD + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE +======= Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) @@ -720,6 +966,7 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 253 Data size: 48084 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -771,15 +1018,31 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: +<<<<<<< HEAD + Map 1 <- Reducer 3 (CUSTOM_EDGE) + Map 2 <- Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +======= Map 1 <- Reducer 4 (CUSTOM_EDGE) Map 3 <- Map 2 (CUSTOM_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) +>>>>>>> asf/master #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -788,6 +1051,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -796,6 +1060,17 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2 input vertices: +<<<<<<< HEAD + 1 Reducer 3 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE +======= 1 Reducer 4 Statistics: Num rows: 287 Data size: 1093 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -805,6 +1080,7 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 287 Data size: 1093 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,6 +1091,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -842,6 +1128,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -850,27 +1137,68 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: +<<<<<<< HEAD + 1 Map 4 + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), substr(_col2, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE +======= 0 Map 2 Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: tab + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 +======= Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs Reducer 4 +>>>>>>> asf/master Execution mode: llap Reduce Operator Tree: Group By Operator @@ -878,16 +1206,28 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 261 Data size: 994 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 261 Data size: 994 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 261 Data size: 994 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: double) Stage: Stage-0 @@ -924,6 +1264,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -932,6 +1282,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -941,6 +1292,16 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Reducer 3 +<<<<<<< HEAD + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) @@ -949,6 +1310,7 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -959,6 +1321,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -967,6 +1339,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -976,22 +1349,38 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 4 +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), substr(_col1, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), substr(_col1, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 253 Data size: 45401 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs @@ -999,6 +1388,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: y +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1007,11 +1406,16 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -1022,16 +1426,28 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 126 Data size: 22610 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 126 Data size: 22610 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 126 Data size: 22610 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: double) Stage: Stage-0 @@ -1064,6 +1480,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +======= Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1078,12 +1528,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: c +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1092,17 +1553,32 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1111,11 +1587,16 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1167,13 +1648,24 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1182,6 +1674,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 475 Data size: 85013 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -1190,6 +1683,16 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: +<<<<<<< HEAD + 1 Map 3 + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) +======= 1 Map 2 Statistics: Num rows: 522 Data size: 93514 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -1213,12 +1716,23 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: y +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 924 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1227,17 +1741,32 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 878 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: c +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 3490 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1246,14 +1775,41 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 3316 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1314,6 +1870,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1322,6 +1888,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -1331,6 +1898,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 +<<<<<<< HEAD + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE +======= Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) @@ -1339,6 +1916,7 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1414,6 +1992,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1422,6 +2010,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -1431,6 +2020,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 +<<<<<<< HEAD + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE +======= Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) @@ -1439,6 +2038,7 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 522 Data size: 99066 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1491,6 +2091,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 45994 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) @@ -1499,11 +2109,16 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 43713 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1511,6 +2126,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 92896 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) @@ -1519,11 +2144,16 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 475 Data size: 88251 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 88251 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1536,10 +2166,17 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD + Statistics: Num rows: 565 Data size: 105090 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 565 Data size: 105090 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 522 Data size: 97076 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 522 Data size: 97076 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1597,6 +2234,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 43428 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1605,11 +2252,16 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 41274 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1617,6 +2269,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +======= Statistics: Num rows: 500 Data size: 94800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1631,6 +2314,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 475 Data size: 90060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1675,8 +2359,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: +<<<<<<< HEAD + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) +======= Map 2 <- Map 1 (CUSTOM_EDGE) Map 3 <- Map 2 (BROADCAST_EDGE) +>>>>>>> asf/master #### A masked pattern was here #### Vertices: Map 1 @@ -1702,6 +2390,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) +======= Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) @@ -1726,12 +2430,29 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 522 Data size: 1988 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) +======= Statistics: Num rows: 500 Data size: 89488 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) @@ -1761,8 +2482,60 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +>>>>>>> asf/master + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1168 Data size: 9344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1794,6 +2567,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 242 Data size: 90522 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) @@ -1802,11 +2585,16 @@ STAGE PLANS: expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 230 Data size: 83680 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 230 Data size: 83680 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1814,6 +2602,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 186800 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) @@ -1822,26 +2620,43 @@ STAGE PLANS: expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master value expressions: _col1 (type: string) Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 475 Data size: 172376 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 237 Data size: 86096 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master Dynamic Partitioning Event Operator Target column: ds (string) Target Input: a Partition key expr: ds +<<<<<<< HEAD + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 237 Data size: 86096 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs @@ -1855,6 +2670,16 @@ STAGE PLANS: 0 _col0 (type: int), _col2 (type: string) 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col4 +<<<<<<< HEAD + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1150 Data size: 421688 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) @@ -1863,6 +2688,7 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 1150 Data size: 421688 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1913,6 +2739,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1921,6 +2757,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 475 Data size: 6855 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -1929,16 +2766,33 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 +<<<<<<< HEAD + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -1947,11 +2801,16 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1961,10 +2820,10 @@ STAGE PLANS: aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2032,6 +2891,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 7216 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2040,6 +2909,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 475 Data size: 6855 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -2048,16 +2918,33 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 +<<<<<<< HEAD + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 522 Data size: 7540 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -2066,11 +2953,16 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2080,10 +2972,10 @@ STAGE PLANS: aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index e1373c438a..3f0778c6be 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -399,7 +399,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -478,7 +479,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -605,6 +606,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -638,8 +686,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -765,7 +819,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -844,7 +899,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -950,7 +1005,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -971,6 +1026,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -984,7 +1086,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1004,8 +1106,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index c254d89dfa..14f1efe792 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -111,7 +111,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -192,7 +193,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -319,6 +320,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -352,8 +400,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -483,7 +537,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -564,7 +619,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -670,7 +725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -691,6 +746,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -704,7 +806,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -724,8 +826,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -872,7 +980,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -953,7 +1062,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1108,7 +1217,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1129,6 +1238,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 61 Data size: 17884 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1142,7 +1298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1162,8 +1318,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 167813553f..161631ac23 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -135,7 +135,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -216,7 +217,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -343,6 +344,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -376,8 +424,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -507,7 +561,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -588,7 +643,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -694,7 +749,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -715,6 +770,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -728,7 +830,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -748,8 +850,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index dd67a55a72..4c27b15c9b 100644 --- ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -135,7 +135,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -214,7 +215,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -339,6 +340,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 27 Data size: 5170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -372,8 +420,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -491,7 +545,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -570,7 +625,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -674,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -695,6 +750,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 27 Data size: 5170 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -708,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -728,8 +830,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out index 44f9d68a7e..58ab6ad0c9 100644 --- ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out +++ ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(key), 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(value, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 16)","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(tmp_values_col2), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(tmp_values_col3), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 diff --git ql/src/test/results/clientpositive/llap/column_access_stats.q.out ql/src/test/results/clientpositive/llap/column_access_stats.q.out index 4cdb486108..0226ba9732 100644 --- ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -406,19 +406,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -431,10 +431,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 374 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,19 +538,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(val) = 3.0) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -564,10 +564,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -639,19 +639,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(key) = 6.0) and val is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: val (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -664,10 +664,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -734,19 +734,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -772,19 +772,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t3 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 860 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -798,16 +798,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 374 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 374 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 1012 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 374 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -818,10 +818,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 411 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 411 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/column_table_stats.q.out ql/src/test/results/clientpositive/llap/column_table_stats.q.out index c397f80e71..00094319a7 100644 --- ql/src/test/results/clientpositive/llap/column_table_stats.q.out +++ ql/src/test/results/clientpositive/llap/column_table_stats.q.out @@ -59,7 +59,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -176,11 +175,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.s/ - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.s/ Column Stats Desc: Columns: key, value Column Types: string, string @@ -318,7 +315,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -494,11 +490,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.spart/ - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.spart/ Column Stats Desc: Columns: key, value Column Types: string, string @@ -735,7 +729,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -911,11 +904,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.spart/ - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.spart/ Column Stats Desc: Columns: key, value Column Types: string, string @@ -1152,7 +1143,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -1280,11 +1270,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.spart/ - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.spart/ Column Stats Desc: Columns: key, value Column Types: string, string diff --git ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index c7199760ac..9aad1df683 100644 --- ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 @@ -60,7 +60,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -74,22 +73,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Statistics Aggregation Key Prefix: default.s/ GatherStats: true Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false @@ -104,7 +103,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -125,7 +124,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -154,13 +153,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -179,10 +178,8 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats NoJob Work: Column Stats Desc: Columns: key, value Column Types: string, string @@ -315,7 +312,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -329,13 +325,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: spart - Statistics: Num rows: 2 Data size: 1812 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Statistics Aggregation Key Prefix: default.spart/ GatherStats: true Select Operator expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 2 Data size: 1812 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 1076 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) @@ -365,7 +361,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -409,7 +405,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -485,10 +481,8 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats NoJob Work: Column Stats Desc: Columns: key, value Column Types: string, string @@ -720,7 +714,6 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 depends on stages: Stage-0 - Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-0 @@ -734,13 +727,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: spart - Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Statistics Aggregation Key Prefix: default.spart/ GatherStats: true Select Operator expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: key, value, ds - Statistics: Num rows: 1 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 354 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), '11' (type: string) @@ -770,7 +763,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -845,10 +838,8 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-2 - Stats-Aggr Operator - - Stage: Stage-3 - Column Stats Work + Stats Work + Basic Stats NoJob Work: Column Stats Desc: Columns: key, value Column Types: string, string @@ -974,7 +965,7 @@ Database: default Table: spart #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 diff --git ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index 9368514fd7..4e3786f0b7 100644 --- ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -98,18 +98,18 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -key int from deserializer +key int 27 484 0 20 HL from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -value string from deserializer +value string 0 20 6.8 7 HL from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -152,18 +152,18 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -key int from deserializer +key int 27 495 0 30 HL from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -value string from deserializer +value string 0 30 6.833333333333333 7 HL from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -210,36 +210,36 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -key int from deserializer +key int 15 495 0 40 HL from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -value string from deserializer +value string 0 40 6.825 7 HL from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -key int from deserializer +key int 15 495 0 58 HL from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector -value string from deserializer +value string 0 58 6.883333333333334 7 HL from deserializer PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats diff --git ql/src/test/results/clientpositive/llap/ctas.q.out ql/src/test/results/clientpositive/llap/ctas.q.out index fe492e43ad..23514de58d 100644 --- ql/src/test/results/clientpositive/llap/ctas.q.out +++ ql/src/test/results/clientpositive/llap/ctas.q.out @@ -97,7 +97,8 @@ STAGE PLANS: name: default.nzhang_CTAS1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -253,7 +254,8 @@ STAGE PLANS: name: default.nzhang_ctas2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -409,7 +411,8 @@ STAGE PLANS: name: default.nzhang_ctas3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -630,7 +633,8 @@ STAGE PLANS: name: default.nzhang_ctas4 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -790,7 +794,8 @@ STAGE PLANS: name: default.nzhang_ctas5 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index ffc39575d1..8e241d9aa6 100644 --- ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -108,7 +108,8 @@ STAGE PLANS: name: default.srcpart_date Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out index db657f8975..5d85a782b4 100644 --- ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out @@ -319,49 +319,49 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs @@ -388,10 +388,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -401,15 +401,15 @@ STAGE PLANS: keys: 0 1 - Statistics: Num rows: 407313124 Data size: 57586148244 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -418,10 +418,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -442,17 +442,17 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 8 Execution mode: llap @@ -465,25 +465,25 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 20182 Data size: 1416580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index fe86a60974..5ae41c5bbd 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -173,6 +173,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -181,11 +191,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -195,11 +210,19 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -223,7 +246,12 @@ STAGE PLANS: name: default.over1k_part_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc PREHOOK: query: explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -248,6 +276,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -262,6 +306,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -272,15 +317,26 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -288,11 +344,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -316,7 +380,12 @@ STAGE PLANS: name: default.over1k_part_limit_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc PREHOOK: query: explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -340,6 +409,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -348,11 +427,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -362,11 +446,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -389,7 +481,12 @@ STAGE PLANS: name: default.over1k_part_buck_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc PREHOOK: query: explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -413,6 +510,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -421,11 +528,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -435,11 +547,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -462,7 +582,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc PREHOOK: query: insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY @@ -554,6 +679,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -562,11 +697,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -576,11 +716,19 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -604,7 +752,12 @@ STAGE PLANS: name: default.over1k_part_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc PREHOOK: query: explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -629,6 +782,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -643,6 +812,7 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -653,15 +823,26 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -669,11 +850,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -697,7 +886,12 @@ STAGE PLANS: name: default.over1k_part_limit_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc PREHOOK: query: explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -721,6 +915,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -729,11 +933,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -743,11 +952,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -770,7 +987,12 @@ STAGE PLANS: name: default.over1k_part_buck_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc PREHOOK: query: explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -794,6 +1016,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -802,11 +1034,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -816,11 +1053,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -843,7 +1088,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc PREHOOK: query: insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY @@ -938,7 +1188,7 @@ Database: default Table: over1k_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 640 @@ -980,7 +1230,7 @@ Database: default Table: over1k_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 120 @@ -1022,7 +1272,7 @@ Database: default Table: over1k_part_limit_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 14 rawDataSize 280 @@ -1064,7 +1314,7 @@ Database: default Table: over1k_part_limit_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 120 @@ -1105,7 +1355,7 @@ Database: default Table: over1k_part_buck_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 640 @@ -1146,7 +1396,7 @@ Database: default Table: over1k_part_buck_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 4 rawDataSize 80 @@ -1187,7 +1437,7 @@ Database: default Table: over1k_part_buck_sort_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 640 @@ -1228,7 +1478,7 @@ Database: default Table: over1k_part_buck_sort_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 4 rawDataSize 80 @@ -1331,12 +1581,27 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -1349,24 +1614,68 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 18 Data size: 1998 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1385,7 +1694,12 @@ STAGE PLANS: name: default.over1k_part2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1409,6 +1723,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -1417,11 +1741,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1431,11 +1760,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1459,7 +1796,12 @@ STAGE PLANS: name: default.over1k_part2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY @@ -1484,6 +1826,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) @@ -1493,6 +1846,7 @@ STAGE PLANS: key expressions: _col2 (type: int) sort order: + Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: vectorized, llap @@ -1503,6 +1857,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 is null or (_col0 = 27)) (type: boolean) + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 @@ -1514,11 +1881,16 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -1526,11 +1898,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 220 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1554,7 +1934,12 @@ STAGE PLANS: name: default.over1k_part2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1572,35 +1957,61 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) +<<<<<<< HEAD + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) @@ -1609,11 +2020,48 @@ STAGE PLANS: File Output Operator compressed: false Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 9 Data size: 999 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1632,7 +2080,12 @@ STAGE PLANS: name: default.over1k_part2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1656,20 +2109,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1679,6 +2147,17 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_SORTED + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) @@ -1688,6 +2167,7 @@ STAGE PLANS: compressed: false Dp Sort State: PARTITION_SORTED Statistics: Num rows: 28 Data size: 639 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1711,7 +2191,12 @@ STAGE PLANS: name: default.over1k_part2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1755,7 +2240,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1797,7 +2282,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1900,7 +2385,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1942,7 +2427,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2041,12 +2526,23 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -2055,29 +2551,77 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col3 (type: float) sort order: + Map-reduce partition columns: _col0 (type: smallint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 2 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2095,7 +2639,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -2119,6 +2668,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc +<<<<<<< HEAD + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (t is null or (t = 27)) (type: boolean) + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1049 Data size: 23952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) @@ -2127,11 +2686,16 @@ STAGE PLANS: expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2141,11 +2705,19 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' +<<<<<<< HEAD + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 57 Data size: 1301 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2168,7 +2740,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort2_orc Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc PREHOOK: query: insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY @@ -2211,7 +2788,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2252,7 +2829,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2429,7 +3006,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2470,7 +3047,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 2 rawDataSize 52 diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 36898ceed9..ce41ddfac9 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -180,7 +180,12 @@ STAGE PLANS: name: default.over1k_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part PREHOOK: query: explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -273,7 +278,12 @@ STAGE PLANS: name: default.over1k_part_limit Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit PREHOOK: query: explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -346,7 +356,12 @@ STAGE PLANS: name: default.over1k_part_buck Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck PREHOOK: query: explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -419,7 +434,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort PREHOOK: query: insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -561,7 +581,12 @@ STAGE PLANS: name: default.over1k_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part PREHOOK: query: explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY @@ -654,7 +679,12 @@ STAGE PLANS: name: default.over1k_part_limit Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit PREHOOK: query: explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -727,7 +757,12 @@ STAGE PLANS: name: default.over1k_part_buck Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck PREHOOK: query: explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -800,7 +835,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort PREHOOK: query: insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -895,7 +935,7 @@ Database: default Table: over1k_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 32 rawDataSize 830 @@ -937,7 +977,7 @@ Database: default Table: over1k_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 156 @@ -979,7 +1019,7 @@ Database: default Table: over1k_part_limit #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 14 rawDataSize 362 @@ -1021,7 +1061,7 @@ Database: default Table: over1k_part_limit #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 2 numRows 6 rawDataSize 156 @@ -1062,7 +1102,7 @@ Database: default Table: over1k_part_buck #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 8 numRows 32 rawDataSize 830 @@ -1103,7 +1143,7 @@ Database: default Table: over1k_part_buck #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 4 numRows 6 rawDataSize 156 @@ -1144,7 +1184,7 @@ Database: default Table: over1k_part_buck_sort #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 8 numRows 32 rawDataSize 830 @@ -1185,7 +1225,7 @@ Database: default Table: over1k_part_buck_sort #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 4 numRows 6 rawDataSize 156 @@ -1288,6 +1328,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1324,6 +1365,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1342,7 +1419,12 @@ STAGE PLANS: name: default.over1k_part2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1416,7 +1498,12 @@ STAGE PLANS: name: default.over1k_part2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY @@ -1511,7 +1598,12 @@ STAGE PLANS: name: default.over1k_part2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1529,6 +1621,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1571,6 +1664,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1589,7 +1718,12 @@ STAGE PLANS: name: default.over1k_part2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY @@ -1668,7 +1802,12 @@ STAGE PLANS: name: default.over1k_part2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY @@ -1712,7 +1851,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1754,7 +1893,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1857,7 +1996,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1899,7 +2038,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1998,6 +2137,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2035,6 +2175,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2052,7 +2228,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -2125,7 +2306,12 @@ STAGE PLANS: name: default.over1k_part_buck_sort2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 PREHOOK: query: insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY @@ -2168,7 +2354,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2209,7 +2395,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2311,7 +2497,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2352,7 +2538,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2502,7 +2688,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 PREHOOK: type: QUERY @@ -2577,7 +2768,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 PREHOOK: type: QUERY @@ -2652,7 +2848,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 PREHOOK: type: QUERY @@ -2727,7 +2928,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and s="foo" PREHOOK: type: QUERY @@ -2802,7 +3008,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 and s="foo" PREHOOK: type: QUERY @@ -2877,7 +3088,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo" PREHOOK: type: QUERY @@ -2893,6 +3109,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -2914,8 +3133,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part3 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2935,7 +3190,12 @@ STAGE PLANS: name: default.over1k_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 PREHOOK: query: insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where s="foo" PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 6488e9ee60..fb653efa03 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -140,7 +140,8 @@ STAGE PLANS: name: default.acid_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY @@ -234,7 +235,8 @@ STAGE PLANS: name: default.acid_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY @@ -420,7 +422,8 @@ STAGE PLANS: name: default.acid_part_sdpo Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY @@ -514,7 +517,8 @@ STAGE PLANS: name: default.acid_part_sdpo Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY @@ -710,7 +714,8 @@ STAGE PLANS: name: default.acid_2l_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY @@ -805,7 +810,8 @@ STAGE PLANS: name: default.acid_2l_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY @@ -929,7 +935,8 @@ STAGE PLANS: name: default.acid_2l_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: delete from acid_2L_part where value = 'bar' PREHOOK: type: QUERY @@ -1122,7 +1129,8 @@ STAGE PLANS: name: default.acid_2l_part_sdpo Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY @@ -1217,7 +1225,8 @@ STAGE PLANS: name: default.acid_2l_part_sdpo Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY @@ -1341,7 +1350,8 @@ STAGE PLANS: name: default.acid_2l_part_sdpo Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: delete from acid_2L_part_sdpo where value = 'bar' PREHOOK: type: QUERY @@ -1536,7 +1546,8 @@ STAGE PLANS: name: default.acid_2l_part_sdpo_no_cp Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY @@ -1632,7 +1643,8 @@ STAGE PLANS: name: default.acid_2l_part_sdpo_no_cp Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 4693c2b156..27949efaf6 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -2590,261 +2590,386 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 10 <- Union 11 (CONTAINS) -Map 12 <- Union 11 (CONTAINS) -Map 13 <- Union 11 (CONTAINS) -Map 16 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 17 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 18 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 6 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) -Map 6 <- Map 15 (BROADCAST_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 11 (SIMPLE_EDGE) -Reducer 9 <- Map 14 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE), Union 4 (CONTAINS) +Map 1 <- Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 13 <- Union 14 (CONTAINS) +Map 15 <- Union 14 (CONTAINS) +Map 16 <- Union 14 (CONTAINS) +Map 19 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 20 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 21 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 22 <- Map 9 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 8 <- Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 9 <- Map 18 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE) +Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 4 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 4 (CUSTOM_SIMPLE_EDGE) Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.a"} Stage-4 Dependency Collection{} Stage-3 - Union 4 - <-Map 16 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_69._col1=SEL_54._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_69] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_123] (rows=27 width=7) - Conds:SEL_10._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 15 [BROADCAST_EDGE] llap - BROADCAST [RS_67] + Reducer 5 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Union 4 [CUSTOM_SIMPLE_EDGE] + <-Map 19 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_69._col1=SEL_54._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_69] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_123] (rows=27 width=7) + Conds:SEL_10._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 18 [BROADCAST_EDGE] llap + BROADCAST [RS_67] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_114] (rows=25 width=7) + predicate:key is not null + TableScan [TS_49] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_10] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_106] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_8] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_54] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_115] (rows=25 width=7) + predicate:value is not null + TableScan [TS_52] (rows=25 width=7) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Map 20 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_129._col1=SEL_57._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_129] + PartitionCols:_col1 + Please refer to the previous Map Join Operator [MAPJOIN_123] + <-Select Operator [SEL_57] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_116] (rows=500 width=10) + predicate:value is not null + TableScan [TS_55] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Map 21 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_130._col1=SEL_61._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_130] + PartitionCols:_col1 + Please refer to the previous Map Join Operator [MAPJOIN_123] + <-Select Operator [SEL_61] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_117] (rows=500 width=10) + predicate:value is not null + TableScan [TS_59] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Map 22 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=1677 width=10) + Output:["_col0","_col1"] + Map Join Operator [MAPJOIN_124] (rows=1677 width=10) + Conds:RS_131._col1=SEL_64._col0(Inner),Output:["_col0","_col3"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_131] + PartitionCols:_col1 + Please refer to the previous Map Join Operator [MAPJOIN_123] + <-Select Operator [SEL_64] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_118] (rows=500 width=10) + predicate:value is not null + TableScan [TS_62] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Reducer 12 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_44] (rows=1239 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_122] (rows=1239 width=10) + Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] + <-Map 17 [SIMPLE_EDGE] llap + SHUFFLE [RS_42] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25 width=7) + Select Operator [SEL_37] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_114] (rows=25 width=7) + Filter Operator [FIL_112] (rows=500 width=10) predicate:key is not null - TableScan [TS_49] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_10] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_106] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_8] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_54] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_115] (rows=25 width=7) - predicate:value is not null - TableScan [TS_52] (rows=25 width=7) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Map 17 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_129._col1=SEL_57._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_129] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_123] - <-Select Operator [SEL_57] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_116] (rows=500 width=10) - predicate:value is not null - TableScan [TS_55] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Map 18 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_130._col1=SEL_61._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_130] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_123] - <-Select Operator [SEL_61] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_117] (rows=500 width=10) - predicate:value is not null - TableScan [TS_59] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Map 19 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_72] (rows=1677 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_124] (rows=1677 width=10) - Conds:RS_131._col1=SEL_64._col0(Inner),Output:["_col0","_col3"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_131] - PartitionCols:_col1 - Please refer to the previous Map Join Operator [MAPJOIN_123] - <-Select Operator [SEL_64] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_118] (rows=500 width=10) - predicate:value is not null - TableScan [TS_62] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] - <-Reducer 3 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_20] (rows=634 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_120] (rows=634 width=10) - Conds:Union 2._col1=RS_18._col0(Inner),Output:["_col1","_col4"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_13] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_107] (rows=500 width=10) - predicate:key is not null - TableScan [TS_11] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] llap - Reduce Output Operator [RS_17] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_119] (rows=577 width=10) - Conds:SEL_2._col0=RS_15._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_15] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_10] - <-Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_104] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["value"] - <-Map 5 [CONTAINS] llap - Reduce Output Operator [RS_17] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_119] (rows=577 width=10) - Conds:SEL_5._col0=RS_125._col1(Inner),Output:["_col1"] - <-Map 6 [BROADCAST_EDGE] llap - BROADCAST [RS_125] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_10] - <-Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_105] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_20] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_20] - <-Reducer 9 [CONTAINS] llap - File Output Operator [FS_75] - table:{"name:":"default.a"} - Select Operator [SEL_44] (rows=1239 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_122] (rows=1239 width=10) - Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] - <-Map 14 [SIMPLE_EDGE] llap - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_37] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_112] (rows=500 width=10) - predicate:key is not null - TableScan [TS_35] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_41] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_121] (rows=1127 width=10) - Conds:Union 11._col0=RS_39._col1(Inner),Output:["_col1"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_39] + TableScan [TS_35] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_41] PartitionCols:_col1 - Select Operator [SEL_34] (rows=500 width=10) + Merge Join Operator [MERGEJOIN_121] (rows=1127 width=10) + Conds:Union 14._col0=RS_39._col1(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_39] + PartitionCols:_col1 + Select Operator [SEL_34] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_11] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Union 14 [SIMPLE_EDGE] + <-Map 13 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_108] (rows=25 width=7) + predicate:value is not null + TableScan [TS_21] (rows=25 width=7) + Output:["value"] + <-Map 15 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_109] (rows=500 width=10) + predicate:value is not null + TableScan [TS_24] (rows=500 width=10) + Output:["value"] + <-Map 16 [CONTAINS] llap + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_110] (rows=500 width=10) + predicate:value is not null + TableScan [TS_28] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + <-Reducer 3 [CONTAINS] llap + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_20] (rows=634 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_120] (rows=634 width=10) + Conds:Union 2._col1=RS_18._col0(Inner),Output:["_col1","_col4"] + <-Map 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_111] (rows=500 width=10) - predicate:(key is not null and value is not null) + Filter Operator [FIL_107] (rows=500 width=10) + predicate:key is not null Please refer to the previous TableScan [TS_11] - <-Union 11 [SIMPLE_EDGE] - <-Map 10 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=25 width=7) - Output:["_col0"] - Filter Operator [FIL_108] (rows=25 width=7) - predicate:value is not null - TableScan [TS_21] (rows=25 width=7) - Output:["value"] - <-Map 12 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_109] (rows=500 width=10) - predicate:value is not null - TableScan [TS_24] (rows=500 width=10) - Output:["value"] - <-Map 13 [CONTAINS] llap - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_110] (rows=500 width=10) - predicate:value is not null - TableScan [TS_28] (rows=500 width=10) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_44] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_44] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_17] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_119] (rows=577 width=10) + Conds:SEL_2._col0=RS_15._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_15] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_10] + <-Select Operator [SEL_2] (rows=25 width=7) + Output:["_col0"] + Filter Operator [FIL_104] (rows=25 width=7) + predicate:value is not null + TableScan [TS_0] (rows=25 width=7) + Output:["value"] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_17] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_119] (rows=577 width=10) + Conds:SEL_5._col0=RS_125._col1(Inner),Output:["_col1"] + <-Map 9 [BROADCAST_EDGE] llap + BROADCAST [RS_125] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_10] + <-Select Operator [SEL_5] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_105] (rows=500 width=10) + predicate:value is not null + TableScan [TS_3] (rows=500 width=10) + Output:["value"] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=3550 width=10) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 6 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Union 4 [CUSTOM_SIMPLE_EDGE] Stage-6 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.b"} Please refer to the previous Stage-4 Stage-7 - Stats-Aggr Operator + Stats Work{} Stage-2 Move Operator table:{"name:":"default.c"} @@ -2886,297 +3011,323 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 10 <- Map 22 (BROADCAST_EDGE) -Map 14 <- Union 15 (CONTAINS) -Map 19 <- Union 15 (CONTAINS) -Map 20 <- Union 17 (CONTAINS) -Map 23 <- Union 24 (CONTAINS) -Map 30 <- Union 24 (CONTAINS) -Map 31 <- Union 26 (CONTAINS) -Map 32 <- Union 28 (CONTAINS) -Map 9 <- Union 2 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 16 <- Union 15 (SIMPLE_EDGE), Union 17 (CONTAINS) -Reducer 18 <- Union 17 (SIMPLE_EDGE) -Reducer 25 <- Union 24 (SIMPLE_EDGE), Union 26 (CONTAINS) -Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Map 10 (BROADCAST_EDGE), Union 28 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 3 <- Map 10 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 12 <- Union 2 (CONTAINS) +Map 13 <- Map 25 (BROADCAST_EDGE) +Map 17 <- Union 18 (CONTAINS) +Map 22 <- Union 18 (CONTAINS) +Map 23 <- Union 20 (CONTAINS) +Map 26 <- Union 27 (CONTAINS) +Map 33 <- Union 27 (CONTAINS) +Map 34 <- Union 29 (CONTAINS) +Map 35 <- Union 31 (CONTAINS) +Reducer 10 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 16 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 19 <- Union 18 (SIMPLE_EDGE), Union 20 (CONTAINS) +Reducer 21 <- Union 20 (SIMPLE_EDGE) +Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) +Reducer 3 <- Map 13 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Map 13 (BROADCAST_EDGE), Union 31 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.a"} Stage-4 Dependency Collection{} Stage-3 - Reducer 8 llap - File Output Operator [FS_123] - table:{"name:":"default.a"} - Group By Operator [GBY_120] (rows=530 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 29 [CONTAINS] llap - Reduce Output Operator [RS_119] - PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_114] (rows=484 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_172] (rows=484 width=10) - Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] - <-Map 10 [BROADCAST_EDGE] llap - BROADCAST [RS_111] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_171] (rows=27 width=7) - Conds:SEL_15._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 22 [BROADCAST_EDGE] llap - BROADCAST [RS_109] - PartitionCols:_col0 - Select Operator [SEL_74] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_162] (rows=25 width=7) - predicate:key is not null - TableScan [TS_72] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_15] (rows=25 width=7) + Reducer 10 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Group By Operator [GBY_120] (rows=530 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 32 [CONTAINS] llap + Reduce Output Operator [RS_119] + PartitionCols:_col0, _col1 + Group By Operator [GBY_118] (rows=1061 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_114] (rows=484 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_107] (rows=440 width=10) - Output:["_col1"] - Group By Operator [GBY_106] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 32 [CONTAINS] llap - Reduce Output Operator [RS_105] - PartitionCols:_col0, _col1 - Group By Operator [GBY_104] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_100] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_166] (rows=500 width=10) - predicate:value is not null - TableScan [TS_98] (rows=500 width=10) - Output:["key","value"] - <-Reducer 27 [CONTAINS] llap - Reduce Output Operator [RS_105] - PartitionCols:_col0, _col1 - Group By Operator [GBY_104] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_97] (rows=381 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_96] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 31 [CONTAINS] llap - Reduce Output Operator [RS_95] - PartitionCols:_col0, _col1 - Group By Operator [GBY_94] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_90] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_165] (rows=500 width=10) - predicate:value is not null - TableScan [TS_88] (rows=500 width=10) - Output:["key","value"] - <-Reducer 25 [CONTAINS] llap - Reduce Output Operator [RS_95] - PartitionCols:_col0, _col1 - Group By Operator [GBY_94] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_87] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_86] (rows=262 width=10) + Map Join Operator [MAPJOIN_172] (rows=484 width=10) + Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] + <-Map 13 [BROADCAST_EDGE] llap + BROADCAST [RS_111] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_171] (rows=27 width=7) + Conds:SEL_15._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 25 [BROADCAST_EDGE] llap + BROADCAST [RS_109] + PartitionCols:_col0 + Select Operator [SEL_74] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_162] (rows=25 width=7) + predicate:key is not null + TableScan [TS_72] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_15] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_13] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_107] (rows=440 width=10) + Output:["_col1"] + Group By Operator [GBY_106] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 35 [CONTAINS] llap + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_100] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_166] (rows=500 width=10) + predicate:value is not null + TableScan [TS_98] (rows=500 width=10) + Output:["key","value"] + <-Reducer 30 [CONTAINS] llap + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_97] (rows=381 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_96] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 34 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_90] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_165] (rows=500 width=10) + predicate:value is not null + TableScan [TS_88] (rows=500 width=10) + Output:["key","value"] + <-Reducer 28 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_87] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_86] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 27 [SIMPLE_EDGE] + <-Map 26 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_77] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=25 width=7) + predicate:value is not null + TableScan [TS_75] (rows=25 width=7) + Output:["key","value"] + <-Map 33 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_80] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500 width=10) + predicate:value is not null + TableScan [TS_78] (rows=500 width=10) + Output:["key","value"] + <-Reducer 6 [CONTAINS] llap + Reduce Output Operator [RS_119] + PartitionCols:_col0, _col1 + Group By Operator [GBY_118] (rows=1061 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_67] (rows=577 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] llap + Reduce Output Operator [RS_66] + PartitionCols:_col0, _col1 + Group By Operator [GBY_65] (rows=1155 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_61] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) + Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] + <-Map 24 [SIMPLE_EDGE] llap + SHUFFLE [RS_59] + PartitionCols:_col0 + Select Operator [SEL_54] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=500 width=10) + predicate:key is not null + TableScan [TS_52] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 15 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) + Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] + <-Map 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_56] + PartitionCols:_col1 + Select Operator [SEL_51] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_16] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 21 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_47] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 20 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_41] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500 width=10) + predicate:value is not null + TableScan [TS_39] (rows=500 width=10) + Output:["key","value"] + <-Reducer 19 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_38] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 18 [SIMPLE_EDGE] + <-Map 17 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_28] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=25 width=7) + predicate:value is not null + TableScan [TS_26] (rows=25 width=7) + Output:["key","value"] + <-Map 22 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_31] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500 width=10) + predicate:value is not null + TableScan [TS_29] (rows=500 width=10) + Output:["key","value"] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_66] + PartitionCols:_col0, _col1 + Group By Operator [GBY_65] (rows=1155 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_25] (rows=550 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) + Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] + <-Map 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=500 width=10) + predicate:key is not null + Please refer to the previous TableScan [TS_16] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_167] (rows=288 width=10) + Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] + <-Map 13 [BROADCAST_EDGE] llap + BROADCAST [RS_20] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_15] + <-Select Operator [SEL_12] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_11] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 24 [SIMPLE_EDGE] - <-Map 23 [CONTAINS] llap - Reduce Output Operator [RS_85] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_84] (rows=525 width=10) + Group By Operator [GBY_9] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_77] (rows=25 width=7) + Select Operator [SEL_2] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=25 width=7) + Filter Operator [FIL_152] (rows=25 width=7) predicate:value is not null - TableScan [TS_75] (rows=25 width=7) + TableScan [TS_0] (rows=25 width=7) Output:["key","value"] - <-Map 30 [CONTAINS] llap - Reduce Output Operator [RS_85] + <-Map 12 [CONTAINS] llap + Reduce Output Operator [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_84] (rows=525 width=10) + Group By Operator [GBY_9] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_80] (rows=500 width=10) + Select Operator [SEL_5] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500 width=10) + Filter Operator [FIL_153] (rows=500 width=10) predicate:value is not null - TableScan [TS_78] (rows=500 width=10) + TableScan [TS_3] (rows=500 width=10) Output:["key","value"] - <-Reducer 6 [CONTAINS] llap - Reduce Output Operator [RS_119] - PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=577 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 13 [CONTAINS] llap - Reduce Output Operator [RS_66] - PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) - Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] - <-Map 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_59] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=10) - predicate:key is not null - TableScan [TS_52] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 12 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) - Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] - PartitionCols:_col1 - Select Operator [SEL_51] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 18 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_47] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 17 [SIMPLE_EDGE] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_41] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500 width=10) - predicate:value is not null - TableScan [TS_39] (rows=500 width=10) - Output:["key","value"] - <-Reducer 16 [CONTAINS] llap - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_38] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 15 [SIMPLE_EDGE] - <-Map 14 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 4 [CONTAINS] llap - Reduce Output Operator [RS_66] - PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) - Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=500 width=10) - predicate:key is not null - Please refer to the previous TableScan [TS_16] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col2 - Map Join Operator [MAPJOIN_167] (rows=288 width=10) - Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] - <-Map 10 [BROADCAST_EDGE] llap - BROADCAST [RS_20] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_15] - <-Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] llap - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] - File Output Operator [FS_125] - table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_120] - File Output Operator [FS_127] - table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_120] + Reducer 11 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] + Reducer 9 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] Stage-6 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.b"} Please refer to the previous Stage-4 Stage-7 - Stats-Aggr Operator + Stats Work{} Stage-2 Move Operator table:{"name:":"default.c"} @@ -3215,68 +3366,88 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-4 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.dest1"} Stage-3 Dependency Collection{} Stage-2 - Reducer 5 llap - File Output Operator [FS_21] - table:{"name:":"default.dest1"} - Select Operator [SEL_19] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_18] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_13] (rows=1 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0, _col1 - Group By Operator [GBY_11] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_28] - table:{"name:":"default.dest2"} - Select Operator [SEL_26] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_25] (rows=1 width=464) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_13] + Reducer 6 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_21] + table:{"name:":"default.dest1"} + Select Operator [SEL_19] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_18] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Group By Operator [GBY_13] (rows=1 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_7] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0, _col1 + Group By Operator [GBY_11] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_5] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_19] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Select Operator [SEL_26] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_25] (rows=1 width=464) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_13] Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.dest2"} @@ -3388,87 +3559,109 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.dest1"} Stage-3 Dependency Collection{} Stage-2 - Reducer 4 llap - File Output Operator [FS_18] - table:{"name:":"default.dest1"} - Select Operator [SEL_16] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_21] - PartitionCols:_col0, _col1 - Group By Operator [GBY_20] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_7] - <-Map 7 [CONTAINS] llap - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_10] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_9] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_21] - PartitionCols:_col0, _col1 - Group By Operator [GBY_20] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_10] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Group By Operator [GBY_13] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_5] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_21] - PartitionCols:_col0, _col1 - Group By Operator [GBY_20] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_5] Reducer 5 llap - File Output Operator [FS_25] - table:{"name:":"default.dest2"} - Select Operator [SEL_23] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_22] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_18] + table:{"name:":"default.dest1"} + Select Operator [SEL_16] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_7] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_20] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_7] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_10] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_9] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_20] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_10] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_5] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_21] + PartitionCols:_col0, _col1 + Group By Operator [GBY_20] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_25] + table:{"name:":"default.dest2"} + Select Operator [SEL_23] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_22] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_23] Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.dest2"} @@ -3497,72 +3690,94 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.dest1"} Stage-3 Dependency Collection{} Stage-2 - Reducer 4 llap - File Output Operator [FS_16] - table:{"name:":"default.dest1"} - Select Operator [SEL_14] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_13] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_7] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_19] - PartitionCols:_col0, _col1 - Group By Operator [GBY_18] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_7] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_12] - PartitionCols:_col0 - Group By Operator [GBY_11] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_5] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_19] - PartitionCols:_col0, _col1 - Group By Operator [GBY_18] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_5] Reducer 5 llap - File Output Operator [FS_23] - table:{"name:":"default.dest2"} - Select Operator [SEL_21] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_20] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_16] + table:{"name:":"default.dest1"} + Select Operator [SEL_14] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_7] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_19] + PartitionCols:_col0, _col1 + Group By Operator [GBY_18] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_7] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_5] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_19] + PartitionCols:_col0, _col1 + Group By Operator [GBY_18] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_23] + table:{"name:":"default.dest2"} + Select Operator [SEL_21] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_20] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_21] Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.dest2"} diff --git ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index 6b6ba4db18..c14bc4ffb9 100644 --- ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -197,7 +197,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -332,7 +332,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -853,7 +853,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -899,7 +899,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -945,7 +945,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -991,7 +991,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1083,7 +1083,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1129,7 +1129,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1175,7 +1175,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1221,7 +1221,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1313,7 +1313,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt diff --git ql/src/test/results/clientpositive/llap/groupby1.q.out ql/src/test/results/clientpositive/llap/groupby1.q.out index 5917013fa4..d58a9fd69f 100644 --- ql/src/test/results/clientpositive/llap/groupby1.q.out +++ ql/src/test/results/clientpositive/llap/groupby1.q.out @@ -95,7 +95,8 @@ STAGE PLANS: name: default.dest_g1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/groupby2.q.out ql/src/test/results/clientpositive/llap/groupby2.q.out index f94db4edf1..c45271997e 100644 --- ql/src/test/results/clientpositive/llap/groupby2.q.out +++ ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -27,6 +27,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,6 +83,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 309 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 309 Data size: 114948 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -96,7 +134,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/llap/groupby3.q.out ql/src/test/results/clientpositive/llap/groupby3.q.out index 3495de6d3f..d050c4ec69 100644 --- ql/src/test/results/clientpositive/llap/groupby3.q.out +++ ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -45,6 +45,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +74,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -95,6 +97,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -110,7 +140,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out index 1efb81b35f..35b68914b5 100644 --- ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out +++ ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out @@ -42,7 +42,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git ql/src/test/results/clientpositive/llap/insert1.q.out ql/src/test/results/clientpositive/llap/insert1.q.out index af4b8871e1..73d4f3ce32 100644 --- ql/src/test/results/clientpositive/llap/insert1.q.out +++ ql/src/test/results/clientpositive/llap/insert1.q.out @@ -38,6 +38,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -59,8 +62,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -76,7 +107,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 PREHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY @@ -92,6 +128,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -113,8 +152,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -130,7 +197,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 PREHOOK: query: create database x PREHOOK: type: CREATEDATABASE @@ -160,6 +232,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -181,8 +256,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -198,7 +301,12 @@ STAGE PLANS: name: x.insert1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 PREHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY @@ -214,6 +322,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -235,8 +346,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -252,7 +391,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 PREHOOK: query: explain from insert2 @@ -276,6 +420,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -297,6 +445,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE @@ -312,8 +473,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1052 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -329,7 +533,12 @@ STAGE PLANS: name: default.insert1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 Stage: Stage-1 Move Operator @@ -342,7 +551,12 @@ STAGE PLANS: name: x.insert1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git ql/src/test/results/clientpositive/llap/insert_into1.q.out ql/src/test/results/clientpositive/llap/insert_into1.q.out index df72ca120d..f8609d441a 100644 --- ql/src/test/results/clientpositive/llap/insert_into1.q.out +++ ql/src/test/results/clientpositive/llap/insert_into1.q.out @@ -26,6 +26,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,7 +42,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -67,6 +67,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -82,7 +110,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -148,6 +181,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -163,7 +197,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -189,6 +222,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -204,7 +265,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -270,6 +336,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -285,7 +352,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -311,6 +377,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -326,7 +420,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY @@ -390,6 +489,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -409,8 +511,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -426,7 +556,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY @@ -452,6 +587,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -471,8 +609,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -488,7 +654,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/insert_into2.q.out ql/src/test/results/clientpositive/llap/insert_into2.q.out index a42c651d63..83f0e3dedc 100644 --- ql/src/test/results/clientpositive/llap/insert_into2.q.out +++ ql/src/test/results/clientpositive/llap/insert_into2.q.out @@ -30,6 +30,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -45,7 +46,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -71,6 +71,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -88,7 +124,12 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY @@ -193,6 +234,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -208,7 +250,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -234,6 +275,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -251,7 +328,12 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 @@ -325,6 +407,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -340,7 +423,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -366,6 +448,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 9000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 949 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 965 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -383,7 +501,12 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 diff --git ql/src/test/results/clientpositive/llap/join1.q.out ql/src/test/results/clientpositive/llap/join1.q.out index 724481beef..3c0a155cd8 100644 --- ql/src/test/results/clientpositive/llap/join1.q.out +++ ql/src/test/results/clientpositive/llap/join1.q.out @@ -25,7 +25,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +48,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -90,6 +91,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -105,7 +134,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/llap/lineage3.q.out ql/src/test/results/clientpositive/llap/lineage3.q.out index 2c53bec759..6a613a6205 100644 --- ql/src/test/results/clientpositive/llap/lineage3.q.out +++ ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -10,7 +10,7 @@ insert into table d1 select x + length(y) PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 -{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[1,2],"targets":[0],"expression":"compute_stats((UDFToInteger(a.ctinyint) + length(b.cstring1)), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: drop table if exists d2 PREHOOK: type: DROPTABLE PREHOOK: query: create table d2(b varchar(128)) @@ -25,7 +25,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(x), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE PREHOOK: query: create table t as @@ -51,7 +51,7 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=today -{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1,2],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(default.alltypesorc.cint, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( alltypesorc.cstring1 AS varchar(128)), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'today'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +61,7 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=tomorrow -{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[3],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[5,3],"targets":[0,1,2],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(min(default.alltypesorc.cint), 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( min(default.alltypesorc.cstring1) AS varchar(128)), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'tomorrow'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -348,21 +348,23 @@ PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year f PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp1 -{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[6],"targets":[4],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[5],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"first"},{"id":5,"vertexType":"COLUMN","vertexId":"word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2 -{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[11],"targets":[4,5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[6],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[7],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":5,"vertexType":"COLUMN","vertexId":"month"},{"id":6,"vertexType":"COLUMN","vertexId":"first"},{"id":7,"vertexType":"COLUMN","vertexId":"word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":9,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":10,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2@y=0 +Result schema has 3 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"63e990b47e7ab4eb6f2ea09dfb7453ff","queryText":"insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[6],"targets":[0,1,2],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp3@y=0 +Result schema has 4 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"6bf71a9d02c0612c63b6f40b15c1e8b3","queryText":"insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.day"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: drop table if exists src_dp1 PREHOOK: type: DROPTABLE @@ -385,4 +387,4 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"},{"sources":[11],"targets":[0],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[12],"targets":[3],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index cdb688b139..730b95b1dc 100644 --- ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -32,6 +32,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +76,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -126,6 +148,42 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -159,8 +217,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src @@ -204,7 +268,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/llap/llap_smb.q.out ql/src/test/results/clientpositive/llap/llap_smb.q.out index c3047da75b..44d78f5e59 100644 --- ql/src/test/results/clientpositive/llap/llap_smb.q.out +++ ql/src/test/results/clientpositive/llap/llap_smb.q.out @@ -215,18 +215,18 @@ STAGE PLANS: TableScan alias: b filterExpr: id is not null (type: boolean) - Statistics: Num rows: 200 Data size: 1528 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: id is not null (type: boolean) - Statistics: Num rows: 190 Data size: 1451 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a filterExpr: id is not null (type: boolean) - Statistics: Num rows: 5000 Data size: 158008 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: id is not null (type: boolean) - Statistics: Num rows: 4750 Data size: 74104 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 5000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -234,18 +234,18 @@ STAGE PLANS: 0 id (type: bigint) 1 id (type: bigint) outputColumnNames: _col2, _col3 - Statistics: Num rows: 5225 Data size: 81514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 988 Data size: 7904 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col2 (type: int), _col3 (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5225 Data size: 81514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: smallint) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint) - Statistics: Num rows: 5225 Data size: 81514 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap Reducer 2 @@ -256,10 +256,10 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2612 Data size: 40749 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2612 Data size: 40749 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index 6ad9af8086..17beee16a9 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -64,6 +64,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -85,6 +89,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -100,8 +120,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1158 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -120,7 +196,12 @@ STAGE PLANS: name: default.nzhang_part1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-1 Move Operator @@ -136,7 +217,12 @@ STAGE PLANS: name: default.nzhang_part2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index ce5517a54d..984a0f4729 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -46,6 +46,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -79,6 +80,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2316 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -97,7 +134,12 @@ STAGE PLANS: name: default.nzhang_part_bucket Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out index b34975fedc..514d6f9fc6 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out @@ -50,6 +50,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -68,8 +71,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 4992 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -88,7 +127,12 @@ STAGE PLANS: name: default.nzhang_part3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 9c4e8a891b..cbf1a8dbed 100644 --- ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -37,6 +37,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -55,8 +58,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 132750 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -74,7 +113,12 @@ STAGE PLANS: name: default.nzhang_part5 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/mapreduce1.q.out ql/src/test/results/clientpositive/llap/mapreduce1.q.out index 37f92d9f20..13148de3a4 100644 --- ql/src/test/results/clientpositive/llap/mapreduce1.q.out +++ ql/src/test/results/clientpositive/llap/mapreduce1.q.out @@ -34,6 +34,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +74,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -88,7 +117,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/llap/mapreduce2.q.out ql/src/test/results/clientpositive/llap/mapreduce2.q.out index 71bbb7e612..6e351a210f 100644 --- ql/src/test/results/clientpositive/llap/mapreduce2.q.out +++ ql/src/test/results/clientpositive/llap/mapreduce2.q.out @@ -32,6 +32,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +71,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -85,7 +114,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/llap/multi_insert.q.out ql/src/test/results/clientpositive/llap/multi_insert.q.out index 58fc759f26..813ae56670 100644 --- ql/src/test/results/clientpositive/llap/multi_insert.q.out +++ ql/src/test/results/clientpositive/llap/multi_insert.q.out @@ -36,6 +36,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,6 +61,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -72,8 +89,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -89,7 +149,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -102,7 +167,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -179,6 +249,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -200,6 +274,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -215,8 +302,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -232,7 +362,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -245,7 +380,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -322,6 +462,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -343,6 +487,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -358,8 +515,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -375,7 +575,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -388,7 +593,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -465,6 +675,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -486,6 +700,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -501,8 +728,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -518,7 +788,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -531,7 +806,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -610,6 +890,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -648,6 +930,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -664,6 +959,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -679,7 +1017,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -692,7 +1035,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -764,6 +1112,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -802,6 +1152,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -818,6 +1181,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -833,7 +1239,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -846,7 +1257,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -918,6 +1334,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -956,6 +1374,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -972,6 +1403,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -987,7 +1461,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1000,7 +1479,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1072,6 +1556,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1110,6 +1596,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1126,6 +1625,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1141,7 +1683,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1154,7 +1701,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1226,7 +1778,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1249,6 +1803,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1260,9 +1827,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1282,6 +1862,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1293,8 +1886,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1312,7 +1948,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1325,7 +1966,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1423,7 +2069,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,6 +2094,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1457,9 +2118,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1479,6 +2153,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1490,8 +2177,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1509,7 +2239,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1522,7 +2257,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1620,7 +2360,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1643,6 +2385,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1654,9 +2409,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1676,6 +2444,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1687,8 +2468,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1706,7 +2530,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1719,7 +2548,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1817,7 +2651,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1840,6 +2676,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1851,9 +2700,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1873,6 +2735,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1884,8 +2759,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1903,7 +2821,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1916,7 +2839,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 diff --git ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out index 6d20939009..8d07b4321f 100644 --- ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out @@ -56,6 +56,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -83,6 +87,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -105,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -126,6 +156,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -148,8 +191,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -165,7 +251,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -178,7 +269,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C @@ -278,7 +374,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -395,10 +493,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -416,6 +542,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -431,7 +585,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -444,7 +603,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key @@ -528,7 +692,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -611,9 +778,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -637,6 +832,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE @@ -658,6 +866,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -673,7 +924,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -686,7 +942,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -699,7 +960,12 @@ STAGE PLANS: name: default.src_lv3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key @@ -797,8 +1063,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -926,10 +1195,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -947,7 +1244,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -968,6 +1293,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 1840 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -983,7 +1336,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -996,7 +1354,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -1009,7 +1372,12 @@ STAGE PLANS: name: default.src_lv3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 PREHOOK: query: from src_10 insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C @@ -1149,8 +1517,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1271,10 +1643,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1292,7 +1692,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1316 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Forward @@ -1318,6 +1746,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE @@ -1339,6 +1780,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1248 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -1354,7 +1838,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -1367,7 +1856,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -1380,7 +1874,12 @@ STAGE PLANS: name: default.src_lv3 Stage: Stage-8 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 Stage: Stage-3 Move Operator @@ -1393,7 +1892,12 @@ STAGE PLANS: name: default.src_lv4 Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key diff --git ql/src/test/results/clientpositive/llap/orc_analyze.q.out ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 1cc9d61b48..1c6996463a 100644 --- ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -281,7 +281,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 52600 @@ -728,7 +728,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 21950 @@ -771,7 +771,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 22050 @@ -1230,7 +1230,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 21975 @@ -1273,7 +1273,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 22043 diff --git ql/src/test/results/clientpositive/llap/orc_merge1.q.out ql/src/test/results/clientpositive/llap/orc_merge1.q.out index ba29491001..64b846043d 100644 --- ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -60,6 +60,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +81,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,7 +137,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -144,6 +188,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +209,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -191,7 +274,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-4 Tez @@ -273,6 +361,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +382,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -320,7 +447,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge10.q.out ql/src/test/results/clientpositive/llap/orc_merge10.q.out index dd5d1cbbe7..77b4325a0a 100644 --- ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -60,6 +60,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +81,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,7 +137,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -144,6 +188,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +209,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -191,7 +274,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-4 Tez @@ -273,6 +361,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +382,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -320,7 +447,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-4 Tez @@ -459,7 +591,8 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/llap/orc_merge2.q.out ql/src/test/results/clientpositive/llap/orc_merge2.q.out index c38852a95b..658c78901c 100644 --- ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -34,6 +34,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +55,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -73,7 +112,12 @@ STAGE PLANS: name: default.orcfile_merge2a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, diff --git ql/src/test/results/clientpositive/llap/orc_merge5.q.out ql/src/test/results/clientpositive/llap/orc_merge5.q.out index 8be0f55cfc..dd89460bf5 100644 --- ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +61,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,7 +106,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -130,6 +166,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -152,8 +191,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -178,7 +245,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b Stage: Stage-4 Tez @@ -299,7 +371,8 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5b concatenate PREHOOK: type: ALTER_TABLE_MERGE diff --git ql/src/test/results/clientpositive/llap/orc_merge6.q.out ql/src/test/results/clientpositive/llap/orc_merge6.q.out index afba086cde..83f62cf1ec 100644 --- ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +61,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 51277 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 51277 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 51277 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -78,7 +117,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -175,6 +219,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -197,8 +244,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 51277 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 51277 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 51277 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -226,7 +309,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-4 Tez @@ -434,7 +522,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/llap/orc_merge7.q.out ql/src/test/results/clientpositive/llap/orc_merge7.q.out index 58e37c28d0..de39597482 100644 --- ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +57,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -73,7 +112,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY @@ -209,6 +253,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -227,8 +274,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -255,7 +338,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-4 Tez @@ -540,7 +628,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index ba29491001..64b846043d 100644 --- ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -60,6 +60,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +81,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,7 +137,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -144,6 +188,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +209,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -191,7 +274,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-4 Tez @@ -273,6 +361,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +382,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 287250 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -320,7 +447,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-4 Tez diff --git ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index f3d34cce05..d500c7cc1d 100644 --- ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,8 +60,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 102554 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -74,7 +105,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index 1419f90553..60e3e4e2b5 100644 --- ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -36,6 +36,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +57,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 308000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 153832 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -73,7 +112,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY @@ -286,7 +330,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/llap/parallel_colstats.q.out ql/src/test/results/clientpositive/llap/parallel_colstats.q.out index e89bf2f177..4ac3fbb2f5 100644 --- ql/src/test/results/clientpositive/llap/parallel_colstats.q.out +++ ql/src/test/results/clientpositive/llap/parallel_colstats.q.out @@ -29,8 +29,6 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-6 depends on stages: Stage-4, Stage-5 - Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -181,22 +179,13 @@ STAGE PLANS: name: default.src_a Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-6 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: string, string Table: default.src_a - Stage: Stage-7 - Column Stats Work - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.src_b - Stage: Stage-1 Move Operator tables: @@ -208,7 +197,12 @@ STAGE PLANS: name: default.src_b Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value diff --git ql/src/test/results/clientpositive/llap/partition_pruning.q.out ql/src/test/results/clientpositive/llap/partition_pruning.q.out index c525ee7bd6..6060bf7fae 100644 --- ql/src/test/results/clientpositive/llap/partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/partition_pruning.q.out @@ -106,7 +106,7 @@ STAGE PLANS: partition values: dt 2001-01-01 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -175,7 +175,7 @@ STAGE PLANS: partition values: dt 2001-01-01 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -220,7 +220,7 @@ STAGE PLANS: partition values: dt 2001-01-03 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -289,7 +289,7 @@ STAGE PLANS: partition values: dt 2001-01-01 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer @@ -334,7 +334,7 @@ STAGE PLANS: partition values: dt 2001-01-03 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"customer":"true"}} bucket_count -1 column.name.delimiter , columns customer diff --git ql/src/test/results/clientpositive/llap/ptf.q.out ql/src/test/results/clientpositive/llap/ptf.q.out index c69ed1ddc0..8836a424ff 100644 --- ql/src/test/results/clientpositive/llap/ptf.q.out +++ ql/src/test/results/clientpositive/llap/ptf.q.out @@ -2966,8 +2966,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3069,9 +3071,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -3106,7 +3136,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3166,6 +3196,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3181,7 +3239,12 @@ STAGE PLANS: name: default.part_4 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 Stage: Stage-1 Move Operator @@ -3194,7 +3257,12 @@ STAGE PLANS: name: default.part_5 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 PREHOOK: query: from noop(on part partition by p_mfgr diff --git ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index 2bb8e8ab1d..4ff5569a7c 100644 --- ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -34,6 +34,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +55,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 666500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -73,7 +112,12 @@ STAGE PLANS: name: default.rcfile_merge2a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge2a PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, diff --git ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out index 40524f1515..967075c6ad 100644 --- ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out +++ ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -141,6 +141,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -175,8 +210,14 @@ STAGE PLANS: name: default.bucket5_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key @@ -268,6 +309,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -356,6 +398,61 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(aid, 'hll'), compute_stats(bid, 'hll'), compute_stats(t, 'hll'), compute_stats(ctime, 'hll'), compute_stats(etime, 'hll'), compute_stats(l, 'hll'), compute_stats(et, 'hll') + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 3142 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3174 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -388,6 +485,12 @@ STAGE PLANS: name: default.complex_tbl_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false diff --git ql/src/test/results/clientpositive/llap/sample1.q.out ql/src/test/results/clientpositive/llap/sample1.q.out index 1a7fb3254e..3458ee28d0 100644 --- ql/src/test/results/clientpositive/llap/sample1.q.out +++ ql/src/test/results/clientpositive/llap/sample1.q.out @@ -26,6 +26,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +76,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -128,6 +147,37 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -161,8 +211,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s diff --git ql/src/test/results/clientpositive/llap/skewjoin.q.out ql/src/test/results/clientpositive/llap/skewjoin.q.out index 5a87f93e41..a33db4f5fe 100644 --- ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -89,7 +89,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +112,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -154,6 +155,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 809 Data size: 76855 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,7 +198,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 61349f8e5b..c0217281c6 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -41,11 +41,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +62,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -197,6 +198,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -213,6 +234,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -228,7 +277,8 @@ STAGE PLANS: name: default.acidtbl Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-2 Move Operator @@ -241,7 +291,8 @@ STAGE PLANS: name: default.acidtbl Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Move Operator @@ -254,7 +305,12 @@ STAGE PLANS: name: default.merge_tmp_table Stage: Stage-8 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table Stage: Stage-1 Move Operator @@ -267,7 +323,12 @@ STAGE PLANS: name: default.acidtbl Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl PREHOOK: query: explain merge into acidTbl as t using nonAcidOrcTbl s ON t.a = s.a WHEN NOT MATCHED THEN INSERT VALUES(s.a, s.b) @@ -286,8 +347,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -302,7 +364,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -354,6 +416,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -369,5 +459,10 @@ STAGE PLANS: name: default.acidtbl Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl diff --git ql/src/test/results/clientpositive/llap/stats11.q.out ql/src/test/results/clientpositive/llap/stats11.q.out index 387a454cb9..4ab80f2fb4 100644 --- ql/src/test/results/clientpositive/llap/stats11.q.out +++ ql/src/test/results/clientpositive/llap/stats11.q.out @@ -54,7 +54,8 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD @@ -313,7 +314,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -392,7 +394,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -519,6 +521,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -552,8 +601,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -679,7 +734,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -758,7 +814,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -864,7 +920,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -885,6 +941,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 58 Data size: 16921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -898,7 +1001,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -918,8 +1021,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/llap/stats_noscan_1.q.out ql/src/test/results/clientpositive/llap/stats_noscan_1.q.out index d6eacf675a..9315d6bdde 100644 --- ql/src/test/results/clientpositive/llap/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/llap/stats_noscan_1.q.out @@ -44,7 +44,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan PREHOOK: type: QUERY @@ -315,7 +316,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/stats_only_null.q.out ql/src/test/results/clientpositive/llap/stats_only_null.q.out index f25f9d5e8f..8b93d5f2da 100644 --- ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -73,56 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -133,56 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 2120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 2120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git ql/src/test/results/clientpositive/llap/temp_table.q.out ql/src/test/results/clientpositive/llap/temp_table.q.out index 462edae4fe..4cf47b5bd9 100644 --- ql/src/test/results/clientpositive/llap/temp_table.q.out +++ ql/src/test/results/clientpositive/llap/temp_table.q.out @@ -52,7 +52,8 @@ STAGE PLANS: isTemporary: true Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -124,7 +125,8 @@ STAGE PLANS: isTemporary: true Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/llap/tez_dml.q.out ql/src/test/results/clientpositive/llap/tez_dml.q.out index 84c65a7e66..89b74cf569 100644 --- ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -84,7 +84,8 @@ STAGE PLANS: name: default.tmp_src Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -443,6 +444,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -461,8 +465,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src_part + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: c, d + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c, 'hll') + keys: d (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 309 Data size: 56640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 154 Data size: 28228 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -480,7 +520,12 @@ STAGE PLANS: name: default.tmp_src_part Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c + Column Types: string + Table: default.tmp_src_part PREHOOK: query: INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src PREHOOK: type: QUERY @@ -864,6 +909,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -885,6 +934,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.even + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key % 2) = 1) (type: boolean) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE @@ -900,8 +962,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.odd + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 'hll'), compute_stats(d, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -917,7 +1022,12 @@ STAGE PLANS: name: default.even Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.even Stage: Stage-1 Move Operator @@ -930,7 +1040,12 @@ STAGE PLANS: name: default.odd Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.odd PREHOOK: query: FROM src INSERT INTO TABLE even SELECT key, value WHERE key % 2 = 0 diff --git ql/src/test/results/clientpositive/llap/tez_fsstat.q.out ql/src/test/results/clientpositive/llap/tez_fsstat.q.out index 133f50c8db..802140cfd2 100644 --- ql/src/test/results/clientpositive/llap/tez_fsstat.q.out +++ ql/src/test/results/clientpositive/llap/tez_fsstat.q.out @@ -83,7 +83,7 @@ Database: default Table: tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out index 3e434751b4..c3ec623073 100644 --- ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out +++ ql/src/test/results/clientpositive/llap/tez_join_result_complex.q.out @@ -369,7 +369,8 @@ STAGE PLANS: name: default.ct_events1_test Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-0 @@ -1353,7 +1354,8 @@ STAGE PLANS: name: default.ct_events1_test Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index 2ca78d7af8..94f5cb4a43 100644 --- ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -54,7 +54,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,9 +79,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 'hll') + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: dummy @@ -101,8 +118,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 'hll') + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -122,7 +175,12 @@ STAGE PLANS: name: default.partunion1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: id1 + Column Types: int + Table: default.partunion1 PREHOOK: query: insert into table partunion1 partition(part1) select temps.* from ( diff --git ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index d399c5e4f5..16f10e7495 100644 --- ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -53,11 +53,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) - Map 7 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) + Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,7 +80,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -111,7 +113,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s0 @@ -199,10 +201,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -220,6 +250,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -237,7 +295,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -250,7 +313,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM ( select key, value from ( @@ -961,10 +1029,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1000,7 +1070,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1018,7 +1088,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1072,10 +1142,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1093,10 +1191,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -1144,7 +1270,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1157,7 +1288,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM ( select key, value from src s0 @@ -1864,10 +2000,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1903,7 +2041,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1921,7 +2059,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1975,10 +2113,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 309 Data size: 84048 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1996,10 +2162,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -2047,7 +2241,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -2060,7 +2259,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM ( select key, value from src s0 @@ -2758,10 +2962,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2782,7 +2988,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -2870,10 +3076,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2891,6 +3125,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2908,7 +3170,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -2921,7 +3188,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION all @@ -3613,10 +3885,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3637,7 +3911,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -3717,6 +3991,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -3738,6 +4025,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3755,7 +4085,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -3768,7 +4103,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION distinct diff --git ql/src/test/results/clientpositive/llap/union4.q.out ql/src/test/results/clientpositive/llap/union4.q.out index 796bc60a36..747c280c07 100644 --- ql/src/test/results/clientpositive/llap/union4.q.out +++ ql/src/test/results/clientpositive/llap/union4.q.out @@ -30,7 +30,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +52,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -93,7 +94,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -117,6 +146,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -134,7 +176,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 diff --git ql/src/test/results/clientpositive/llap/union6.q.out ql/src/test/results/clientpositive/llap/union6.q.out index 4043e3cb87..068ec75719 100644 --- ql/src/test/results/clientpositive/llap/union6.q.out +++ ql/src/test/results/clientpositive/llap/union6.q.out @@ -29,8 +29,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 4 <- Union 3 (CONTAINS) + Map 5 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +52,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -68,6 +69,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -90,6 +104,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -107,7 +149,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 diff --git ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index c35328031d..81019f7330 100644 --- ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -33,11 +33,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) - Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -62,7 +63,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 8 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -80,6 +81,28 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst2' (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -140,29 +163,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst3' (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -171,7 +200,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 'tst2' (type: string), _col0 (type: bigint) + expressions: 'tst3' (type: string), _col0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -203,7 +232,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 @@ -480,11 +514,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -505,13 +540,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 10 Map Operator Tree: TableScan - alias: s2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + alias: s3 + Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -523,13 +558,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan - alias: s3 - Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE + alias: s2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash @@ -541,7 +576,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 10 + Reducer 11 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -623,7 +658,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable12 - Reducer 8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -664,7 +727,12 @@ STAGE PLANS: name: default.tmptable12 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable12 PREHOOK: query: insert overwrite table tmptable12 select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 @@ -2726,10 +2794,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2750,7 +2820,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -2830,6 +2900,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2851,6 +2934,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2868,7 +2994,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -2881,7 +3012,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION DISTINCT @@ -3585,9 +3721,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 7 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3608,7 +3746,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -3668,6 +3806,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest118 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE @@ -3679,6 +3830,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest218 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3696,7 +3890,12 @@ STAGE PLANS: name: default.dest118 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest118 Stage: Stage-1 Move Operator @@ -3709,7 +3908,12 @@ STAGE PLANS: name: default.dest218 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest218 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION DISTINCT @@ -4413,9 +4617,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 7 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4436,7 +4642,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -4505,6 +4711,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest119 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -4517,6 +4736,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest219 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -4534,7 +4796,12 @@ STAGE PLANS: name: default.dest119 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest119 Stage: Stage-1 Move Operator @@ -4547,7 +4814,12 @@ STAGE PLANS: name: default.dest219 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest219 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION DISTINCT @@ -6467,34 +6739,59 @@ STAGE PLANS: Edges: Map 1 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) +<<<<<<< HEAD + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 2 (CONTAINS) +======= Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE), Union 2 (CONTAINS) +>>>>>>> asf/master #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: dst_union22_delta +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 221500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 454856 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master GatherStats: false Filter Operator isSamplingPred: false predicate: (UDFToDouble(k0) <= 50.0) (type: boolean) +<<<<<<< HEAD + Statistics: Num rows: 166 Data size: 73538 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: k1 (type: string), k2 (type: string), k3 (type: string), k4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 166 Data size: 151012 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: k1 (type: string), k2 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 166 Data size: 151012 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 +<<<<<<< HEAD + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 348 Data size: 248973 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: aaaa sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 348 Data size: 248973 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master tag: -1 auto parallelism: true Execution mode: llap @@ -6510,7 +6807,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -6551,26 +6848,42 @@ STAGE PLANS: name: default.dst_union22_delta Truncated Path -> Alias: /dst_union22_delta/ds=1 [dst_union22_delta] - Map 4 + Map 5 Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 278292 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master GatherStats: false Filter Operator isSamplingPred: false predicate: (UDFToDouble(k1) > 20.0) (type: boolean) +<<<<<<< HEAD + Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: k1 (type: string), k2 (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 166 Data size: 89056 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: k1 (type: string), k2 (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 89056 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 166 Data size: 89056 Basic stats: COMPLETE Column stats: PARTIAL +>>>>>>> asf/master tag: 0 value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: true @@ -6587,7 +6900,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -6628,26 +6941,46 @@ STAGE PLANS: name: default.dst_union22 Truncated Path -> Alias: /dst_union22/ds=1 [a] +<<<<<<< HEAD + Map 7 + Map Operator Tree: + TableScan + alias: dst_union22_delta + Statistics: Num rows: 500 Data size: 176000 Basic stats: COMPLETE Column stats: COMPLETE +======= Map 6 Map Operator Tree: TableScan alias: dst_union22_delta Statistics: Num rows: 500 Data size: 367272 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(k0) > 50.0) and (UDFToDouble(k1) > 20.0)) (type: boolean) +<<<<<<< HEAD + Statistics: Num rows: 55 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: k1 (type: string), k3 (type: string), k4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 55 Data size: 40399 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: k1 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 55 Data size: 40399 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 55 Data size: 40399 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master tag: 1 value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: true @@ -6664,7 +6997,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -6713,14 +7046,22 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 +<<<<<<< HEAD + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 174 Data size: 124486 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2/ +<<<<<<< HEAD + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 174 Data size: 124486 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6744,15 +7085,74 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 5 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - filter mappings: - 0 [1, 1] +<<<<<<< HEAD + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 332 Data size: 146412 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 2053 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: true + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2005 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2005 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 6 +======= + Reducer 5 +>>>>>>> asf/master + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + filter mappings: + 0 [1, 1] filter predicates: 0 {(VALUE._col1 = '1')} 1 @@ -6761,22 +7161,38 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col4, _col5 Position of Big Table: 0 +<<<<<<< HEAD + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 182 Data size: 97961 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 182 Data size: 97961 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 +<<<<<<< HEAD + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 348 Data size: 248973 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: aaaa sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 348 Data size: 248973 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master tag: -1 auto parallelism: true Union 2 @@ -6813,8 +7229,14 @@ STAGE PLANS: name: default.dst_union22 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') select * from @@ -9225,54 +9647,86 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1000 Data size: 360960 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1000 Data size: 360960 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: b +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1000 Data size: 360960 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1000 Data size: 360960 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -9282,20 +9736,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 375 Data size: 69750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 65472 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 375 Data size: 69750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 65472 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 375 Data size: 69750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 65472 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -9308,30 +9762,46 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 180480 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string) mode: complete outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 205 Data size: 35670 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 90240 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 Union 5 @@ -9350,7 +9820,8 @@ STAGE PLANS: name: default.tmp_unionall Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -10853,10 +11324,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 7 <- Union 2 (CONTAINS), Union 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 8 <- Union 2 (CONTAINS), Union 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -10880,7 +11352,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -10903,7 +11375,7 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 8 + Map 9 Map Operator Tree: TableScan alias: src @@ -10926,6 +11398,25 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -10946,7 +11437,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -10965,7 +11484,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -10987,29 +11506,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 - Union 6 - Vertex: Union 6 + Union 7 + Vertex: Union 7 Stage: Stage-2 Dependency Collection @@ -11025,7 +11525,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -11131,10 +11636,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 5 (CONTAINS) - Map 7 <- Union 5 (CONTAINS) + Map 5 <- Union 6 (CONTAINS) + Map 8 <- Union 6 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Union 2 (CONTAINS), Union 5 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 2 (CONTAINS), Union 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -11158,7 +11664,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -11179,7 +11685,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -11220,7 +11726,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union29 - Reducer 6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -11240,8 +11774,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 - Union 5 - Vertex: Union 5 + Union 6 + Vertex: Union 6 Stage: Stage-2 Dependency Collection @@ -11257,7 +11791,12 @@ STAGE PLANS: name: default.union_subq_union29 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union29 PREHOOK: query: insert overwrite table union_subq_union29 select * from ( @@ -11668,12 +12207,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 12 <- Union 4 (CONTAINS) - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 8 (CONTAINS) + Map 13 <- Union 4 (CONTAINS) + Reducer 10 <- Union 2 (CONTAINS), Union 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 8 (CONTAINS) - Reducer 9 <- Union 2 (CONTAINS), Union 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -11697,7 +12237,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 10 + Map 11 Map Operator Tree: TableScan alias: src @@ -11720,7 +12260,7 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 12 + Map 13 Map Operator Tree: TableScan alias: src @@ -11741,7 +12281,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -11764,7 +12304,29 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -11821,7 +12383,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union30 - Reducer 7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -11840,34 +12430,12 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 Union 4 Vertex: Union 4 - Union 8 - Vertex: Union 8 + Union 9 + Vertex: Union 9 Stage: Stage-2 Dependency Collection @@ -11883,7 +12451,12 @@ STAGE PLANS: name: default.union_subq_union30 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union30 PREHOOK: query: insert overwrite table union_subq_union30 select * from ( @@ -12041,9 +12614,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) + Map 7 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -12067,7 +12642,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: t2 @@ -12118,6 +12693,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string) outputColumnNames: _col1 @@ -12138,6 +12726,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -12155,6 +12758,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -12172,7 +12803,12 @@ STAGE PLANS: name: default.t3 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 Stage: Stage-1 Move Operator @@ -12185,7 +12821,12 @@ STAGE PLANS: name: default.t4 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 PREHOOK: query: from (select * from t1 @@ -12303,7 +12944,9 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -12329,7 +12972,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: t2 @@ -12408,6 +13051,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -12426,10 +13082,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -12462,7 +13161,12 @@ STAGE PLANS: name: default.t5 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 Stage: Stage-1 Move Operator @@ -12475,7 +13179,12 @@ STAGE PLANS: name: default.t6 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 PREHOOK: query: from ( @@ -12632,10 +13341,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -12661,7 +13372,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: t2 @@ -12741,6 +13452,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -12759,6 +13483,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -12776,7 +13543,12 @@ STAGE PLANS: name: default.t7 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 Stage: Stage-1 Move Operator @@ -12789,7 +13561,12 @@ STAGE PLANS: name: default.t8 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 PREHOOK: query: from ( @@ -13650,7 +14427,8 @@ STAGE PLANS: Edges: Map 1 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -13677,7 +14455,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 67750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -13716,7 +14494,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -13756,7 +14562,12 @@ STAGE PLANS: name: default.test_src Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( @@ -13820,9 +14631,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 6 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -13848,7 +14660,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -13911,6 +14723,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -13928,7 +14768,12 @@ STAGE PLANS: name: default.test_src Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( @@ -14095,6 +14940,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src10_1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -14103,6 +14958,7 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Map Join Operator condition map: Inner Join 0 to 1 @@ -14112,23 +14968,45 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 4 +<<<<<<< HEAD + Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 11 Data size: 4048 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src10_2 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -14137,53 +15015,90 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: src10_3 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: src10_4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -14193,10 +15108,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14208,17 +15130,29 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Union 2 Vertex: Union 2 Union 6 @@ -14296,6 +15230,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src10_1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -14304,11 +15248,16 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -14316,6 +15265,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src10_2 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) @@ -14324,53 +15283,90 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 1840 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: src10_3 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Map 9 Map Operator Tree: TableScan alias: src10_4 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 20 Data size: 7360 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -14383,17 +15379,29 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 11 Data size: 4048 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -14401,10 +15409,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14416,17 +15431,29 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) +<<<<<<< HEAD + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 21 Data size: 7728 Basic stats: COMPLETE Column stats: NONE +>>>>>>> asf/master Union 3 Vertex: Union 3 Union 7 @@ -14511,7 +15538,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -14532,7 +15560,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -14592,7 +15620,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -14631,7 +15687,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 @@ -14850,9 +15911,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 6 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -14873,7 +15935,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -14932,6 +15994,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 13 Data size: 3536 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -14949,7 +16039,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 diff --git ql/src/test/results/clientpositive/llap/union_stats.q.out ql/src/test/results/clientpositive/llap/union_stats.q.out index 1f3dc82e15..8e460ab186 100644 --- ql/src/test/results/clientpositive/llap/union_stats.q.out +++ ql/src/test/results/clientpositive/llap/union_stats.q.out @@ -202,7 +202,8 @@ STAGE PLANS: name: default.t Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-0 @@ -425,7 +426,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 @@ -460,7 +461,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 diff --git ql/src/test/results/clientpositive/llap/union_top_level.q.out ql/src/test/results/clientpositive/llap/union_top_level.q.out index cfbc069869..31b17553a0 100644 --- ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -562,7 +562,8 @@ STAGE PLANS: name: default.union_top Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator @@ -643,8 +644,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -665,11 +667,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -687,11 +688,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -709,7 +709,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -735,7 +734,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -757,7 +784,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -779,6 +819,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -796,7 +849,12 @@ STAGE PLANS: name: default.union_top Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -863,8 +921,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -885,11 +944,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -907,11 +965,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -929,7 +986,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -955,7 +1011,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -977,7 +1061,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -999,6 +1096,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -1016,7 +1126,12 @@ STAGE PLANS: name: default.union_top Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a diff --git ql/src/test/results/clientpositive/llap/vector_bround.q.out ql/src/test/results/clientpositive/llap/vector_bround.q.out index d463f1a6e7..d6bdc31ece 100644 --- ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -47,7 +47,7 @@ Stage-0 Select Operator [SEL_3] (rows=8 width=16) Output:["_col0","_col1"] TableScan [TS_0] (rows=8 width=16) - default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:COMPLETE,Output:["v0","v1"] PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_char_simple.q.out ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index 47c709f559..802229fda9 100644 --- ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -239,6 +239,7 @@ STAGE PLANS: Tez Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -265,28 +266,21 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: - Select Vectorization: - className: VectorSelectOperator - native: true - Limit Vectorization: - className: VectorLimitOperator - native: true - Select Vectorization: - className: VectorSelectOperator - native: true - selectExpressions: CastLongToChar(col 0, maxLength 12) -> 1:Char - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Stage: Stage-2 diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 85ddc7cc8d..de42651b67 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -53,11 +53,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -67,14 +67,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,16 +85,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: n - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), attr (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 5a42218e73..7380194b0d 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -838,7 +838,8 @@ STAGE PLANS: name: default.orc_create_complex Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 76925cda73..5c0e685678 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -220,7 +220,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2b - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -230,7 +230,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index 947ac81001..a894e21228 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -43,15 +43,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -60,10 +60,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -106,15 +106,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -123,10 +123,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,15 +169,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -186,10 +186,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -232,15 +232,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -249,10 +249,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -295,15 +295,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -312,10 +312,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -358,15 +358,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -375,10 +375,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -421,15 +421,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -438,10 +438,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -484,15 +484,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -501,10 +501,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -558,15 +558,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -575,10 +575,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -621,15 +621,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -638,10 +638,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -684,15 +684,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -701,10 +701,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -747,15 +747,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -764,10 +764,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -810,15 +810,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -827,10 +827,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -873,15 +873,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -890,10 +890,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -936,15 +936,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -953,10 +953,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -999,15 +999,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1016,10 +1016,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index a3bf091fc1..0686bf0492 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_1_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -70,7 +70,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -78,7 +78,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -107,13 +107,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,7 +209,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_2_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -221,7 +221,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -229,7 +229,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -258,13 +258,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -387,7 +387,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_3_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -399,7 +399,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 14:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 15:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 16:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 17:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 18:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 19:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 20:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 21:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 22:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 23:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 24:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 25:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 26:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 27:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 28:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 29:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 30:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 31:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 32:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 33:decimal(37,16) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -407,7 +407,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -436,13 +436,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -554,7 +554,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_4_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -566,7 +566,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + @@ -574,7 +574,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -604,13 +604,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3] selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index bf2a366284..bc5bd07d6a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -43,6 +43,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -147,6 +149,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -162,7 +218,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 9fa46fb976..a6e24fc97e 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -43,6 +43,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -147,6 +149,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 43792 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -162,7 +218,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 6b2ee48fdf..67deb43320 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -716,8 +716,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -820,10 +822,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: groupByMode: PARTIALS @@ -841,7 +871,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -869,6 +899,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -884,7 +942,12 @@ STAGE PLANS: name: default.t2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-1 Move Operator @@ -897,7 +960,12 @@ STAGE PLANS: name: default.t3 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 5d0b23c7c7..a90388af49 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -50,22 +50,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: category (type: int), live (type: int), comments (type: int) outputColumnNames: category, live, comments - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(live), max(comments) keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -77,16 +77,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Filter Operator predicate: (_col3 > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 Execution mode: vectorized, llap @@ -94,7 +94,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -115,14 +115,14 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index 0e9d6a6c09..b22e33496b 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -441,8 +441,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -500,7 +502,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -521,7 +523,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 9 Data size: 3312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -536,8 +566,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 - Execution mode: vectorized, llap + Reducer 6 + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -558,6 +588,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 9 Data size: 3312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -573,7 +631,12 @@ STAGE PLANS: name: default.t2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-1 Move Operator @@ -586,7 +649,12 @@ STAGE PLANS: name: default.t3 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup diff --git ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out index 1c86c0cf26..96e293ffa7 100644 --- ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out @@ -93,6 +93,11 @@ STAGE PLANS: Stage: Stage-3 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -114,6 +119,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -129,6 +147,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -144,16 +175,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -169,7 +271,12 @@ STAGE PLANS: name: default.orc_rn1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 Stage: Stage-1 Move Operator @@ -182,7 +289,12 @@ STAGE PLANS: name: default.orc_rn2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 Stage: Stage-2 Move Operator @@ -195,7 +307,12 @@ STAGE PLANS: name: default.orc_rn3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 PREHOOK: query: from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 diff --git ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out index e596cf2aab..67fa49734e 100644 --- ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out @@ -52,6 +52,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -70,8 +73,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -87,7 +118,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out index b156dc68e7..4f554f38d8 100644 --- ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out @@ -35,6 +35,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -53,8 +56,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -70,7 +101,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index d4b9a1cafd..cd6e61cf99 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -127,7 +127,8 @@ STAGE PLANS: name: default.srcpart_date Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/llap_acid.q.out ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd78cb..e04af0e344 100644 --- ql/src/test/results/clientpositive/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap_acid.q.out @@ -91,18 +91,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -110,10 +110,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,24 +141,26 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 @@ -216,18 +218,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -235,10 +237,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -266,25 +268,27 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 1 1 2 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 diff --git ql/src/test/results/clientpositive/load_dyn_part1.q.out ql/src/test/results/clientpositive/load_dyn_part1.q.out index 84d806d3a9..2bdde45f84 100644 --- ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -57,17 +57,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -91,6 +87,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -106,6 +118,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -130,7 +176,12 @@ STAGE PLANS: name: default.nzhang_part1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-4 Map Reduce @@ -162,15 +213,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -185,37 +227,41 @@ STAGE PLANS: name: default.nzhang_part2 Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part10.q.out ql/src/test/results/clientpositive/load_dyn_part10.q.out index 99d357217d..ec993231c0 100644 --- ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -66,6 +66,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,7 +115,12 @@ STAGE PLANS: name: default.nzhang_part10 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part13.q.out ql/src/test/results/clientpositive/load_dyn_part13.q.out index 9e0ac6fee2..a00258e1c1 100644 --- ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -85,6 +85,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -105,6 +121,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,7 +170,12 @@ STAGE PLANS: name: default.nzhang_part13 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( diff --git ql/src/test/results/clientpositive/load_dyn_part14.q.out ql/src/test/results/clientpositive/load_dyn_part14.q.out index a6a5c63cc5..2eb8450ef8 100644 --- ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -71,7 +71,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -101,6 +100,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -112,6 +127,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,6 +154,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -146,7 +211,12 @@ STAGE PLANS: name: default.nzhang_part14 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 Stage: Stage-4 Map Reduce @@ -192,7 +262,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -222,7 +291,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 diff --git ql/src/test/results/clientpositive/load_dyn_part2.q.out ql/src/test/results/clientpositive/load_dyn_part2.q.out index 93778a22e3..18c31350e3 100644 --- ql/src/test/results/clientpositive/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/load_dyn_part2.q.out @@ -37,7 +37,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +69,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,7 +100,41 @@ STAGE PLANS: name: default.nzhang_part_bucket Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/load_dyn_part3.q.out ql/src/test/results/clientpositive/load_dyn_part3.q.out index 3849100785..bf06701e07 100644 --- ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -64,6 +64,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,7 +113,12 @@ STAGE PLANS: name: default.nzhang_part3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/load_dyn_part4.q.out ql/src/test/results/clientpositive/load_dyn_part4.q.out index 40b0bbbe8c..43219c3fb9 100644 --- ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +123,12 @@ STAGE PLANS: name: default.nzhang_part4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/load_dyn_part8.q.out ql/src/test/results/clientpositive/load_dyn_part8.q.out index cb1a757051..3afd07a560 100644 --- ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -49,7 +49,8 @@ STAGE DEPENDENCIES: Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-1, Stage-5 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -96,6 +97,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -134,6 +154,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -334,6 +382,40 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -364,7 +446,8 @@ STAGE PLANS: name: default.nzhang_part8 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-1 @@ -396,8 +479,90 @@ STAGE PLANS: name: default.nzhang_part8 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/load_dyn_part9.q.out ql/src/test/results/clientpositive/load_dyn_part9.q.out index 414e784309..fc4b66165c 100644 --- ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -66,6 +66,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,7 +115,12 @@ STAGE PLANS: name: default.nzhang_part9 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/mapjoin_hook.q.out ql/src/test/results/clientpositive/mapjoin_hook.q.out index a9f9be3a4d..ae05a725fd 100644 --- ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -13,20 +13,22 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL +RUN: Stage-7:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE -RUN: Stage-3:STATS +RUN: Stage-4:MAPRED +RUN: Stage-3:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL -RUN: Stage-5:MAPRED +RUN: Stage-7:MAPREDLOCAL +RUN: Stage-6:MAPRED RUN: Stage-0:MOVE -RUN: Stage-2:STATS +RUN: Stage-3:MAPRED +RUN: Stage-2:COLUMNSTATS PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -41,11 +43,12 @@ PREHOOK: Output: default@dest1 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 1 -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL RUN: Stage-1:MAPRED RUN: Stage-0:MOVE -RUN: Stage-2:STATS +RUN: Stage-3:MAPRED +RUN: Stage-2:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY @@ -56,11 +59,12 @@ ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 2 -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE -RUN: Stage-3:STATS +RUN: Stage-4:MAPRED +RUN: Stage-3:COLUMNSTATS diff --git ql/src/test/results/clientpositive/mapreduce1.q.out ql/src/test/results/clientpositive/mapreduce1.q.out index 3d0a156557..1c83fa74b6 100644 --- ql/src/test/results/clientpositive/mapreduce1.q.out +++ ql/src/test/results/clientpositive/mapreduce1.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,7 +92,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce2.q.out ql/src/test/results/clientpositive/mapreduce2.q.out index 676c387c7a..b1c68b87d5 100644 --- ql/src/test/results/clientpositive/mapreduce2.q.out +++ ql/src/test/results/clientpositive/mapreduce2.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,7 +89,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce3.q.out ql/src/test/results/clientpositive/mapreduce3.q.out index fc1a402b68..b50cbc9665 100644 --- ql/src/test/results/clientpositive/mapreduce3.q.out +++ ql/src/test/results/clientpositive/mapreduce3.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,7 +89,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce4.q.out ql/src/test/results/clientpositive/mapreduce4.q.out index 17fa029ad4..c61f37043b 100644 --- ql/src/test/results/clientpositive/mapreduce4.q.out +++ ql/src/test/results/clientpositive/mapreduce4.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,7 +92,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce5.q.out ql/src/test/results/clientpositive/mapreduce5.q.out index 21103f88df..a014fc0152 100644 --- ql/src/test/results/clientpositive/mapreduce5.q.out +++ ql/src/test/results/clientpositive/mapreduce5.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,7 +83,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce6.q.out ql/src/test/results/clientpositive/mapreduce6.q.out index fe4e631077..75bb1524b3 100644 --- ql/src/test/results/clientpositive/mapreduce6.q.out +++ ql/src/test/results/clientpositive/mapreduce6.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,7 +83,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1712 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce7.q.out ql/src/test/results/clientpositive/mapreduce7.q.out index cc97887fd8..a342f5c49c 100644 --- ql/src/test/results/clientpositive/mapreduce7.q.out +++ ql/src/test/results/clientpositive/mapreduce7.q.out @@ -23,7 +23,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,7 +89,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/mapreduce8.q.out ql/src/test/results/clientpositive/mapreduce8.q.out index b1763c792f..8a9567f66e 100644 --- ql/src/test/results/clientpositive/mapreduce8.q.out +++ ql/src/test/results/clientpositive/mapreduce8.q.out @@ -25,7 +25,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 'hll'), compute_stats(v, 'hll'), compute_stats(key, 'hll'), compute_stats(ten, 'hll'), compute_stats(one, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,7 +92,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/masking_11.q.out ql/src/test/results/clientpositive/masking_11.q.out index f29c51fdd9..0941b8c2ab 100644 --- ql/src/test/results/clientpositive/masking_11.q.out +++ ql/src/test/results/clientpositive/masking_11.q.out @@ -13,8 +13,10 @@ POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, t PREHOOK: query: analyze table `masking_test` compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@masking_test +PREHOOK: Output: default@masking_test #### A masked pattern was here #### POSTHOOK: query: analyze table `masking_test` compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@masking_test +POSTHOOK: Output: default@masking_test #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/merge1.q.out ql/src/test/results/clientpositive/merge1.q.out index 7423d83ac8..6534c6a477 100644 --- ql/src/test/results/clientpositive/merge1.q.out +++ ql/src/test/results/clientpositive/merge1.q.out @@ -19,10 +19,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -87,7 +103,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 Stage: Stage-3 Map Reduce @@ -119,6 +140,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -527,6 +570,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -548,7 +617,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce @@ -628,6 +702,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -649,7 +749,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge2.q.out ql/src/test/results/clientpositive/merge2.q.out index bbc55d8b9e..ae5f20bca8 100644 --- ql/src/test/results/clientpositive/merge2.q.out +++ ql/src/test/results/clientpositive/merge2.q.out @@ -19,10 +19,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -87,7 +103,12 @@ STAGE PLANS: name: default.test1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 Stage: Stage-3 Map Reduce @@ -119,6 +140,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -527,6 +570,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -548,7 +617,12 @@ STAGE PLANS: name: default.test1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 Stage: Stage-3 Map Reduce @@ -628,6 +702,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -649,7 +749,12 @@ STAGE PLANS: name: default.test1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge3.q.out ql/src/test/results/clientpositive/merge3.q.out index 1eb5f0e03a..4888d97ba1 100644 --- ql/src/test/results/clientpositive/merge3.q.out +++ ql/src/test/results/clientpositive/merge3.q.out @@ -178,7 +178,8 @@ STAGE PLANS: name: default.merge_src2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-3 @@ -2421,6 +2422,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2432,7 +2452,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2479,7 +2499,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2521,6 +2541,40 @@ STAGE PLANS: Truncated Path -> Alias: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2559,8 +2613,14 @@ STAGE PLANS: name: default.merge_src_part2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false Stage: Stage-3 Map Reduce @@ -4814,8 +4874,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col2 (type: string) + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 @@ -4832,7 +4893,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4879,7 +4940,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4956,6 +5017,42 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4994,8 +5091,14 @@ STAGE PLANS: name: default.merge_src_part2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge4.q.out ql/src/test/results/clientpositive/merge4.q.out index 182c6a887e..60d65e8e0b 100644 --- ql/src/test/results/clientpositive/merge4.q.out +++ ql/src/test/results/clientpositive/merge4.q.out @@ -41,6 +41,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -65,7 +99,12 @@ STAGE PLANS: name: default.nzhang_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part Stage: Stage-3 Map Reduce @@ -1161,6 +1200,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1185,7 +1258,12 @@ STAGE PLANS: name: default.nzhang_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part Stage: Stage-3 Map Reduce @@ -2762,10 +2840,11 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2786,7 +2865,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) TableScan alias: src @@ -2803,7 +2881,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator @@ -2821,6 +2898,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -2845,7 +2938,12 @@ STAGE PLANS: name: default.nzhang_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part Stage: Stage-3 Map Reduce @@ -2877,6 +2975,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1063 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git ql/src/test/results/clientpositive/merge_dynamic_partition.q.out ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index a777fe0830..851ac02bac 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -77,6 +77,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +126,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part PREHOOK: query: insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' PREHOOK: type: QUERY @@ -668,6 +707,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -692,7 +765,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce @@ -1298,6 +1376,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1322,7 +1434,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index 5a2afb01ac..aec543e130 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -99,6 +99,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -123,7 +157,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 055e07abd8..076f9e8b7c 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -159,6 +159,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -183,7 +217,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index cbeaf42eaf..8b0b0e7672 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -160,6 +160,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -184,7 +218,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 5a562f4456..f9ac40db58 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -136,6 +136,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -160,7 +194,12 @@ STAGE PLANS: name: default.merge_dynamic_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/metadata_only_queries.q.out ql/src/test/results/clientpositive/metadata_only_queries.q.out index 72a90fbf9d..0fe6684b82 100644 --- ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -395,40 +395,54 @@ STAGE PLANS: PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2010 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2010 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2011 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2011 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2011 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2011 #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2012 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2012 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### PREHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl diff --git ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out index 6376aa79f8..79d9d27203 100644 --- ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out +++ ql/src/test/results/clientpositive/metadata_only_queries_with_filters.q.out @@ -126,21 +126,29 @@ PREHOOK: query: analyze table stats_tbl_part partition(dt=2010) compute statisti PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2010 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt=2010) compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2010 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2014 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2014 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2014 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2014 #### A masked pattern was here #### PREHOOK: query: explain select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 diff --git ql/src/test/results/clientpositive/multi_insert_gby.q.out ql/src/test/results/clientpositive/multi_insert_gby.q.out index cb97e5871f..a9d87ff7e6 100644 --- ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -29,9 +29,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -72,6 +74,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -93,6 +110,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -105,7 +137,42 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-1 Move Operator @@ -117,8 +184,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 @@ -210,9 +296,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-0, Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -251,6 +339,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -272,6 +375,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -284,7 +402,42 @@ STAGE PLANS: name: default.e2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-0 Move Operator @@ -296,8 +449,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/multi_insert_gby4.q.out ql/src/test/results/clientpositive/multi_insert_gby4.q.out index dd01b74da1..f7b1298c05 100644 --- ql/src/test/results/clientpositive/multi_insert_gby4.q.out +++ ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -43,11 +43,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7, Stage-9 + Stage-8 depends on stages: Stage-2, Stage-5, Stage-7, Stage-9 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 Stage-2 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -92,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -113,6 +131,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +167,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -146,7 +194,50 @@ STAGE PLANS: name: default.e1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-8 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e3 Stage: Stage-1 Move Operator @@ -158,8 +249,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -171,8 +281,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (SELECT key, value FROM src) a INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/multi_insert_mixed.q.out ql/src/test/results/clientpositive/multi_insert_mixed.q.out index 9acae2ec54..ef5cfbc8d8 100644 --- ql/src/test/results/clientpositive/multi_insert_mixed.q.out +++ ql/src/test/results/clientpositive/multi_insert_mixed.q.out @@ -38,13 +38,14 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-10 Stage-6 depends on stages: Stage-3 Stage-7 depends on stages: Stage-6 Stage-1 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1, Stage-10 Stage-2 depends on stages: Stage-3 - Stage-9 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2, Stage-10 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -100,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -136,6 +152,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -148,7 +184,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-6 Map Reduce @@ -196,6 +237,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -208,7 +269,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-8 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator @@ -221,7 +287,34 @@ STAGE PLANS: name: default.src_multi3 Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select key, count(1) group by key order by key diff --git ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index af0ef54a74..8cfcde256f 100644 --- ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -28,9 +28,10 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-6 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-6 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -54,6 +55,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -69,6 +83,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -84,7 +126,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -97,7 +144,34 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -166,19 +240,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -202,6 +272,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -217,6 +300,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -241,7 +352,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -254,7 +370,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -286,44 +407,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -390,11 +494,17 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-11 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -418,6 +528,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +556,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -448,7 +599,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -461,7 +617,73 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -532,9 +754,9 @@ STAGE DEPENDENCIES: Stage-6 Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-16 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-16 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -543,6 +765,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -566,6 +789,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -581,6 +817,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -605,7 +869,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -618,7 +887,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -689,6 +963,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -756,9 +1052,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -794,6 +1092,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -810,6 +1123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -825,7 +1153,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -837,8 +1170,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-5 - Stats-Aggr Operator + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -900,19 +1282,21 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1, Stage-10, Stage-17 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -948,6 +1332,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -964,6 +1363,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -988,7 +1402,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1000,8 +1419,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 - Stats-Aggr Operator + Stage: Stage-11 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -1033,16 +1457,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1054,7 +1500,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1066,12 +1512,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1132,9 +1600,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1170,6 +1640,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1186,6 +1671,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -1201,7 +1701,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1213,8 +1718,57 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1276,19 +1830,21 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1, Stage-10, Stage-17 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1324,6 +1880,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1340,6 +1911,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -1364,7 +1950,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1376,8 +1967,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 - Stats-Aggr Operator + Stage: Stage-11 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -1409,16 +2005,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1430,7 +2048,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1442,12 +2060,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1508,9 +2148,10 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-6 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-6 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1536,6 +2177,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1547,6 +2201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1567,6 +2236,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1578,6 +2260,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1593,7 +2303,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1606,7 +2321,34 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1694,19 +2436,15 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1732,6 +2470,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1743,6 +2494,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1763,6 +2529,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1774,6 +2553,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -1798,7 +2605,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1811,7 +2623,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -1843,44 +2660,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1966,11 +2766,17 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-11 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-11 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1996,6 +2802,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2007,6 +2826,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2027,6 +2861,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2038,6 +2885,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -2053,7 +2928,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -2066,7 +2946,73 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -2156,9 +3102,9 @@ STAGE DEPENDENCIES: Stage-6 Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-16 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-1, Stage-16 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -2167,6 +3113,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -2192,6 +3139,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2203,6 +3163,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2223,6 +3198,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2234,6 +3222,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -2258,7 +3274,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -2271,7 +3292,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-5 Map Reduce @@ -2342,6 +3368,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -3273,14 +4321,16 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-10 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-8, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1, Stage-8, Stage-2, Stage-3 Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-4 @@ -3304,6 +4354,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3319,46 +4382,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3374,7 +4434,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3387,7 +4452,76 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3395,7 +4529,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -3497,14 +4631,21 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-8, Stage-11, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-7 depends on stages: Stage-1, Stage-13, Stage-2, Stage-3 + Stage-12 depends on stages: Stage-4 , consists of Stage-9, Stage-8, Stage-10 + Stage-9 + Stage-8 + Stage-10 + Stage-11 depends on stages: Stage-10 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3528,6 +4669,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3543,46 +4697,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3598,7 +4749,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3611,7 +4767,115 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3619,7 +4883,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -3723,22 +4987,19 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-4, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-12 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-1, Stage-13, Stage-2, Stage-3 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 - Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 - Stage-14 - Stage-13 - Stage-15 - Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 - Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3762,6 +5023,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3777,46 +5051,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -3841,7 +5112,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3854,7 +5130,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-12 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-7 Map Reduce @@ -3886,44 +5167,69 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-17 - Conditional Operator - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-16 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3931,7 +5237,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -4035,11 +5341,11 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-19, Stage-20 Stage-0 depends on stages: Stage-6 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-18, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 - Stage-12 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-1, Stage-18, Stage-2, Stage-3 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 @@ -4048,9 +5354,11 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-19 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-19 + Stage-20 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-20 STAGE PLANS: Stage: Stage-4 @@ -4074,6 +5382,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -4089,46 +5410,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -4153,7 +5471,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -4166,7 +5489,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-12 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-7 Map Reduce @@ -4237,13 +5565,77 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/multi_insert_union_src.q.out ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 1ff1db5c12..7e16f9ca4b 100644 --- ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -131,6 +131,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -143,7 +163,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-4 Map Reduce @@ -167,6 +192,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -179,7 +224,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src1 where key < 10 union all select * from src2 where key > 100) s insert overwrite table src_multi1 select key, value where key < 150 order by key diff --git ql/src/test/results/clientpositive/multi_insert_with_join2.q.out ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index 225f2c301c..37cb57244e 100644 --- ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -66,7 +66,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -118,6 +119,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,7 +146,34 @@ STAGE PLANS: name: default.join_result_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain @@ -148,7 +191,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -200,6 +244,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -212,7 +271,34 @@ STAGE PLANS: name: default.join_result_3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -235,9 +321,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-1, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-0, Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -285,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -300,6 +403,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -312,7 +430,42 @@ STAGE PLANS: name: default.join_result_3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 Stage: Stage-0 Move Operator @@ -324,8 +477,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -348,9 +520,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -398,6 +572,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -413,6 +602,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -425,7 +629,42 @@ STAGE PLANS: name: default.join_result_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -437,8 +676,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -461,9 +719,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -517,6 +777,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -532,6 +807,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -544,7 +834,42 @@ STAGE PLANS: name: default.join_result_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -556,8 +881,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -580,9 +924,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -636,6 +982,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -651,6 +1012,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -663,7 +1039,42 @@ STAGE PLANS: name: default.join_result_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -675,8 +1086,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -699,9 +1129,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -767,6 +1199,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -782,6 +1229,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -794,7 +1256,42 @@ STAGE PLANS: name: default.join_result_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -806,8 +1303,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id @@ -830,9 +1346,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -898,6 +1416,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -913,6 +1446,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 'hll'), compute_stats(vala, 'hll'), compute_stats(idb, 'hll'), compute_stats(valb, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -925,7 +1473,42 @@ STAGE PLANS: name: default.join_result_1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 Stage: Stage-1 Move Operator @@ -937,6 +1520,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/multigroupby_singlemr.q.out index 7af8c4356d..a4587e1982 100644 --- ql/src/test/results/clientpositive/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/multigroupby_singlemr.q.out @@ -51,10 +51,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -114,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -126,13 +143,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -157,6 +209,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -168,8 +235,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -184,10 +270,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -247,6 +335,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -259,13 +362,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -290,6 +428,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,8 +454,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -317,10 +489,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -380,6 +554,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -392,13 +581,48 @@ STAGE PLANS: name: default.dest3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -423,6 +647,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -434,8 +673,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -450,9 +708,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-6 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-1, Stage-4, Stage-6 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -492,6 +752,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -510,6 +785,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -522,7 +812,42 @@ STAGE PLANS: name: default.dest3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 Stage: Stage-1 Move Operator @@ -534,8 +859,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 - Stage: Stage-4 - Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: EXPLAIN FROM TBL @@ -552,13 +896,16 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8, Stage-11 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8, Stage-11 + Stage-10 depends on stages: Stage-2, Stage-5, Stage-8, Stage-11 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-9 + Stage-11 depends on stages: Stage-9 STAGE PLANS: Stage: Stage-3 @@ -634,6 +981,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -646,13 +1008,56 @@ STAGE PLANS: name: default.dest3 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-10 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -677,6 +1082,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -688,10 +1108,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -720,6 +1159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -731,6 +1185,25 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 - Stats-Aggr Operator + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index 560708981d..229a7874ab 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -1090,7 +1090,8 @@ STAGE PLANS: name: default.srcx Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: explain analyze table src compute statistics PREHOOK: type: QUERY @@ -1109,7 +1110,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: explain select * from src join src src2 on src.key=src2.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out index d01461b51f..490853c7df 100644 --- ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out +++ ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,7 +37,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -58,6 +58,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -70,7 +85,34 @@ STAGE PLANS: name: default.insert Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY @@ -102,7 +144,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -121,7 +164,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -143,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -155,7 +212,34 @@ STAGE PLANS: name: default.insert Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY @@ -196,7 +280,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,7 +300,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -237,6 +321,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(as, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -249,7 +348,34 @@ STAGE PLANS: name: default.insert Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE `insert` SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/notable_alias1.q.out ql/src/test/results/clientpositive/notable_alias1.q.out index 677545d8d2..03f2649fc4 100644 --- ql/src/test/results/clientpositive/notable_alias1.q.out +++ ql/src/test/results/clientpositive/notable_alias1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -60,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -72,7 +88,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', key, count(1) WHERE src.key < 100 group by key diff --git ql/src/test/results/clientpositive/notable_alias2.q.out ql/src/test/results/clientpositive/notable_alias2.q.out index 66d0b2a92a..7186513b89 100644 --- ql/src/test/results/clientpositive/notable_alias2.q.out +++ ql/src/test/results/clientpositive/notable_alias2.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -60,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 'hll'), compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -72,7 +88,34 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, count(1) WHERE key < 100 group by src.key diff --git ql/src/test/results/clientpositive/nullformatCTAS.q.out ql/src/test/results/clientpositive/nullformatCTAS.q.out index cda09658ce..723a4c9ac1 100644 --- ql/src/test/results/clientpositive/nullformatCTAS.q.out +++ ql/src/test/results/clientpositive/nullformatCTAS.q.out @@ -96,7 +96,8 @@ STAGE PLANS: name: default.null_tab3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/optimize_filter_literal.q.out ql/src/test/results/clientpositive/optimize_filter_literal.q.out index 32e2bf6415..ad7e0a418d 100644 --- ql/src/test/results/clientpositive/optimize_filter_literal.q.out +++ ql/src/test/results/clientpositive/optimize_filter_literal.q.out @@ -90,11 +90,15 @@ PREHOOK: query: analyze table tab_part partition (ds='2008-04-08') compute stati PREHOOK: type: QUERY PREHOOK: Input: default@tab_part PREHOOK: Input: default@tab_part@ds=2008-04-08 +PREHOOK: Output: default@tab_part +PREHOOK: Output: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: analyze table tab_part partition (ds='2008-04-08') compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_part POSTHOOK: Input: default@tab_part@ds=2008-04-08 +POSTHOOK: Output: default@tab_part +POSTHOOK: Output: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS ORCFILE PREHOOK: type: CREATETABLE @@ -122,11 +126,15 @@ PREHOOK: query: analyze table tab partition (ds='2008-04-08') compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab@ds=2008-04-08 +PREHOOK: Output: default@tab +PREHOOK: Output: default@tab@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: analyze table tab partition (ds='2008-04-08') compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab@ds=2008-04-08 +POSTHOOK: Output: default@tab +POSTHOOK: Output: default@tab@ds=2008-04-08 #### A masked pattern was here #### Warning: Shuffle Join JOIN[12][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from diff --git ql/src/test/results/clientpositive/orc_createas1.q.out ql/src/test/results/clientpositive/orc_createas1.q.out index 506f39d59f..ecfad35286 100644 --- ql/src/test/results/clientpositive/orc_createas1.q.out +++ ql/src/test/results/clientpositive/orc_createas1.q.out @@ -108,7 +108,8 @@ STAGE PLANS: name: default.orc_createas1b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Merge File Operator @@ -274,7 +275,8 @@ STAGE PLANS: name: default.orc_createas1c Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge1.q.out ql/src/test/results/clientpositive/orc_merge1.q.out index a83e85bec4..cad6c00a8e 100644 --- ql/src/test/results/clientpositive/orc_merge1.q.out +++ ql/src/test/results/clientpositive/orc_merge1.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +123,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -149,6 +188,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -173,7 +246,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Map Reduce @@ -263,6 +341,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -287,7 +399,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge10.q.out ql/src/test/results/clientpositive/orc_merge10.q.out index 607aaeb6ae..0b8e2ddfba 100644 --- ql/src/test/results/clientpositive/orc_merge10.q.out +++ ql/src/test/results/clientpositive/orc_merge10.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +123,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -149,6 +188,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -173,7 +246,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Map Reduce @@ -263,6 +341,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -287,7 +399,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Merge File Operator @@ -418,7 +535,8 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: ALTER TABLE orcfile_merge1 PARTITION (ds='1', part='0') CONCATENATE PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/orc_merge2.q.out ql/src/test/results/clientpositive/orc_merge2.q.out index d4c474f9fe..70788fde9f 100644 --- ql/src/test/results/clientpositive/orc_merge2.q.out +++ ql/src/test/results/clientpositive/orc_merge2.q.out @@ -53,6 +53,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -78,7 +112,12 @@ STAGE PLANS: name: default.orcfile_merge2a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge3.q.out ql/src/test/results/clientpositive/orc_merge3.q.out index 7bf12c6c28..597b00078e 100644 --- ql/src/test/results/clientpositive/orc_merge3.q.out +++ ql/src/test/results/clientpositive/orc_merge3.q.out @@ -83,6 +83,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,7 +130,12 @@ STAGE PLANS: name: default.orcfile_merge3b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge4.q.out ql/src/test/results/clientpositive/orc_merge4.q.out index 828f204157..49147b5dc6 100644 --- ql/src/test/results/clientpositive/orc_merge4.q.out +++ ql/src/test/results/clientpositive/orc_merge4.q.out @@ -101,6 +101,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -122,7 +148,12 @@ STAGE PLANS: name: default.orcfile_merge3b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/orc_merge5.q.out ql/src/test/results/clientpositive/orc_merge5.q.out index e8451869f6..1ac9e0ccbc 100644 --- ql/src/test/results/clientpositive/orc_merge5.q.out +++ ql/src/test/results/clientpositive/orc_merge5.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -66,7 +92,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -139,6 +170,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -160,7 +217,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b Stage: Stage-3 Merge File Operator @@ -273,7 +335,8 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5b concatenate PREHOOK: type: ALTER_TABLE_MERGE diff --git ql/src/test/results/clientpositive/orc_merge6.q.out ql/src/test/results/clientpositive/orc_merge6.q.out index 5ece361bbc..d0ccbb17e8 100644 --- ql/src/test/results/clientpositive/orc_merge6.q.out +++ ql/src/test/results/clientpositive/orc_merge6.q.out @@ -54,6 +54,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,7 +103,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -184,6 +223,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -208,7 +281,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-3 Merge File Operator @@ -408,7 +486,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index a83e85bec4..cad6c00a8e 100644 --- ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -74,6 +74,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +123,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -149,6 +188,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -173,7 +246,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Map Reduce @@ -263,6 +341,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -287,7 +399,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index fcf1c68f83..5c530b8652 100644 --- ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -53,6 +53,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -65,7 +91,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index a27041fd9b..8eece2b869 100644 --- ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -50,6 +50,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -64,7 +98,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY @@ -277,7 +316,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/parallel.q.out ql/src/test/results/clientpositive/parallel.q.out index 459105e09a..12ad5ee874 100644 --- ql/src/test/results/clientpositive/parallel.q.out +++ ql/src/test/results/clientpositive/parallel.q.out @@ -28,9 +28,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -91,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -104,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -116,7 +148,42 @@ STAGE PLANS: name: default.src_a Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b Stage: Stage-1 Move Operator @@ -128,8 +195,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value diff --git ql/src/test/results/clientpositive/parallel_colstats.q.out ql/src/test/results/clientpositive/parallel_colstats.q.out index 83fc14a72f..12ad5ee874 100644 --- ql/src/test/results/clientpositive/parallel_colstats.q.out +++ ql/src/test/results/clientpositive/parallel_colstats.q.out @@ -28,12 +28,10 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 - Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 - Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1 Stage-7 depends on stages: Stage-3 STAGE PLANS: @@ -150,22 +148,13 @@ STAGE PLANS: name: default.src_a Stage: Stage-4 - Stats-Aggr Operator - - Stage: Stage-8 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: string, string Table: default.src_a - Stage: Stage-9 - Column Stats Work - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.src_b - Stage: Stage-5 Map Reduce Map Operator Tree: @@ -188,6 +177,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + Stage: Stage-1 Move Operator tables: @@ -198,9 +195,6 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b - Stage: Stage-6 - Stats-Aggr Operator - Stage: Stage-7 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/parallel_join1.q.out ql/src/test/results/clientpositive/parallel_join1.q.out index 8843661176..3aa369c0ac 100644 --- ql/src/test/results/clientpositive/parallel_join1.q.out +++ ql/src/test/results/clientpositive/parallel_join1.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,7 +103,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/parallel_orderby.q.out ql/src/test/results/clientpositive/parallel_orderby.q.out index 8249a7baad..9a30336194 100644 --- ql/src/test/results/clientpositive/parallel_orderby.q.out +++ ql/src/test/results/clientpositive/parallel_orderby.q.out @@ -79,7 +79,8 @@ STAGE PLANS: name: default.total_ordered Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table total_ordered as select * from src5 order by key, value PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/parquet_analyze.q.out ql/src/test/results/clientpositive/parquet_analyze.q.out index 76a8d80788..54131f579a 100644 --- ql/src/test/results/clientpositive/parquet_analyze.q.out +++ ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -90,7 +90,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 700 @@ -138,7 +138,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 5952 diff --git ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out index 37d56ca882..93aeea1278 100644 --- ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out +++ ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out @@ -56,7 +56,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone UTC @@ -125,7 +125,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone PST @@ -194,7 +194,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 @@ -262,7 +262,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone CST @@ -314,6 +314,37 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@timestamps #### A masked pattern was here #### 2016-01-01 01:01:01 +<<<<<<< HEAD +2017-01-01 01:01:01 +PREHOOK: query: select * from timestamps order by ts +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: select * from timestamps order by ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2016-01-01 01:01:01 +2017-01-01 01:01:01 +PREHOOK: query: select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: select * from timestamps where ts = cast('2016-01-01 01:01:01' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2016-01-01 01:01:01 +PREHOOK: query: select year(ts), day(ts), hour(ts), ts from timestamps +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +#### A masked pattern was here #### +POSTHOOK: query: select year(ts), day(ts), hour(ts), ts from timestamps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +#### A masked pattern was here #### +2016 1 1 2016-01-01 01:01:01 +2017 1 1 2017-01-01 01:01:01 PREHOOK: query: describe formatted timestamps PREHOOK: type: DESCTABLE PREHOOK: Input: default@timestamps @@ -331,9 +362,120 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} + numFiles 2 + numRows 2 + parquet.mr.int96.write.zone PST + rawDataSize 2 + totalSize 544 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table timestamps +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@timestamps +PREHOOK: Output: default@timestamps +POSTHOOK: query: drop table timestamps +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@timestamps +POSTHOOK: Output: default@timestamps +PREHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps +POSTHOOK: query: create table timestamps (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='PST') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps +PREHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@timestamps +POSTHOOK: query: insert into table timestamps select cast('2016-01-01 01:01:01' as timestamp) limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.ts EXPRESSION [] +PREHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') +PREHOOK: type: QUERY +PREHOOK: Output: default@timestamps +POSTHOOK: query: insert into table timestamps values('2017-01-01 01:01:01') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@timestamps +POSTHOOK: Lineage: timestamps.ts EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@timestamps2 +POSTHOOK: query: create table timestamps2 (ts timestamp) stored as parquet tblproperties('parquet.mr.int96.write.zone'='GMT+2') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@timestamps2 +PREHOOK: query: insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@timestamps2 +POSTHOOK: query: insert into table timestamps2 select cast('2016-01-01 01:01:01' as timestamp) limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@timestamps2 +POSTHOOK: Lineage: timestamps2.ts EXPRESSION [] +PREHOOK: query: insert into table timestamps2 values('2017-01-01 01:01:01') +PREHOOK: type: QUERY +PREHOOK: Output: default@timestamps2 +POSTHOOK: query: insert into table timestamps2 values('2017-01-01 01:01:01') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@timestamps2 +POSTHOOK: Lineage: timestamps2.ts EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from timestamps a inner join timestamps2 b on a.ts = b.ts +PREHOOK: type: QUERY +PREHOOK: Input: default@timestamps +PREHOOK: Input: default@timestamps2 +#### A masked pattern was here #### +POSTHOOK: query: select * from timestamps a inner join timestamps2 b on a.ts = b.ts +POSTHOOK: type: QUERY +POSTHOOK: Input: default@timestamps +POSTHOOK: Input: default@timestamps2 +#### A masked pattern was here #### +2016-01-01 01:01:01 2016-01-01 01:01:01 +2017-01-01 01:01:01 2017-01-01 01:01:01 +======= +>>>>>>> asf/master +PREHOOK: query: describe formatted timestamps +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@timestamps +POSTHOOK: query: describe formatted timestamps +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@timestamps +# col_name data_type comment + +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +<<<<<<< HEAD + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} + numFiles 2 + numRows 2 +======= COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 1 numRows 1 +>>>>>>> asf/master parquet.mr.int96.write.zone PST rawDataSize 1 totalSize 272 @@ -357,6 +499,52 @@ POSTHOOK: query: drop table timestamps POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@timestamps POSTHOOK: Output: default@timestamps +<<<<<<< HEAD +PREHOOK: query: describe formatted timestamps2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@timestamps2 +POSTHOOK: query: describe formatted timestamps2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@timestamps2 +# col_name data_type comment + +ts timestamp + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} + numFiles 2 + numRows 2 + parquet.mr.int96.write.zone GMT+2 + rawDataSize 2 + totalSize 544 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table timestamps2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@timestamps2 +PREHOOK: Output: default@timestamps2 +POSTHOOK: query: drop table timestamps2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@timestamps2 +POSTHOOK: Output: default@timestamps2 +======= +>>>>>>> asf/master PREHOOK: query: create table timestamps (ts timestamp) stored as parquet PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git ql/src/test/results/clientpositive/partial_column_stats.q.out ql/src/test/results/clientpositive/partial_column_stats.q.out index e8b1a99fe2..04a00cdace 100644 --- ql/src/test/results/clientpositive/partial_column_stats.q.out +++ ql/src/test/results/clientpositive/partial_column_stats.q.out @@ -49,7 +49,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: key, value Column Types: int, string @@ -58,10 +59,12 @@ STAGE PLANS: PREHOOK: query: analyze table t1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 #### A masked pattern was here #### POSTHOOK: query: analyze table t1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 #### A masked pattern was here #### PREHOOK: query: desc formatted t1 value PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/partition_coltype_literals.q.out ql/src/test/results/clientpositive/partition_coltype_literals.q.out index 51eda726d6..6759983c27 100644 --- ql/src/test/results/clientpositive/partition_coltype_literals.q.out +++ ql/src/test/results/clientpositive/partition_coltype_literals.q.out @@ -290,11 +290,15 @@ PREHOOK: query: analyze table partcoltypenum partition (tint=110Y, sint=22000S, PREHOOK: type: QUERY PREHOOK: Input: default@partcoltypenum PREHOOK: Input: default@partcoltypenum@tint=110/sint=22000/bint=330000000000 +PREHOOK: Output: default@partcoltypenum +PREHOOK: Output: default@partcoltypenum@tint=110/sint=22000/bint=330000000000 #### A masked pattern was here #### POSTHOOK: query: analyze table partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@partcoltypenum POSTHOOK: Input: default@partcoltypenum@tint=110/sint=22000/bint=330000000000 +POSTHOOK: Output: default@partcoltypenum +POSTHOOK: Output: default@partcoltypenum@tint=110/sint=22000/bint=330000000000 #### A masked pattern was here #### PREHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) key PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 5d0d170245..4eb76412e7 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -138,7 +138,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -280,7 +280,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -327,7 +327,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -374,7 +374,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -554,7 +554,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -601,7 +601,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -745,7 +745,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -792,7 +792,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -938,7 +938,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -985,7 +985,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1189,7 +1189,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1236,7 +1236,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1283,7 +1283,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1447,7 +1447,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1494,7 +1494,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1621,7 +1621,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1835,7 +1835,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1882,7 +1882,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1929,7 +1929,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2123,7 +2123,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2170,7 +2170,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2326,7 +2326,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2589,7 +2589,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2636,7 +2636,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2888,7 +2888,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2935,7 +2935,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2982,7 +2982,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3029,7 +3029,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3208,7 +3208,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3255,7 +3255,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3302,7 +3302,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3466,17 +3466,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -3522,6 +3518,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3558,6 +3570,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3569,7 +3608,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3610,6 +3649,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3649,8 +3717,14 @@ STAGE PLANS: name: default.pcr_t2 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -3832,15 +3906,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -3870,188 +3935,83 @@ STAGE PLANS: name: default.pcr_t3 Stage: Stage-9 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' @@ -4088,17 +4048,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -4127,7 +4083,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4148,6 +4104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -4167,7 +4139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4188,6 +4160,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4199,7 +4198,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4240,6 +4239,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -4259,7 +4287,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4279,8 +4307,14 @@ STAGE PLANS: name: default.pcr_t2 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-4 Map Reduce @@ -4296,7 +4330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4326,7 +4360,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4347,7 +4381,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4383,7 +4417,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4413,7 +4447,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4434,7 +4468,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4462,15 +4496,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -4480,7 +4505,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4500,188 +4525,83 @@ STAGE PLANS: name: default.pcr_t3 Stage: Stage-9 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 diff --git ql/src/test/results/clientpositive/pcs.q.out ql/src/test/results/clientpositive/pcs.q.out index af5d11ad44..6b2ad02cb5 100644 --- ql/src/test/results/clientpositive/pcs.q.out +++ ql/src/test/results/clientpositive/pcs.q.out @@ -70,6 +70,10 @@ PREHOOK: Input: default@pcs_t1 PREHOOK: Input: default@pcs_t1@ds=2000-04-08 PREHOOK: Input: default@pcs_t1@ds=2000-04-09 PREHOOK: Input: default@pcs_t1@ds=2000-04-10 +PREHOOK: Output: default@pcs_t1 +PREHOOK: Output: default@pcs_t1@ds=2000-04-08 +PREHOOK: Output: default@pcs_t1@ds=2000-04-09 +PREHOOK: Output: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### POSTHOOK: query: analyze table pcs_t1 partition(ds) compute statistics for columns POSTHOOK: type: QUERY @@ -77,6 +81,10 @@ POSTHOOK: Input: default@pcs_t1 POSTHOOK: Input: default@pcs_t1@ds=2000-04-08 POSTHOOK: Input: default@pcs_t1@ds=2000-04-09 POSTHOOK: Input: default@pcs_t1@ds=2000-04-10 +POSTHOOK: Output: default@pcs_t1 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-09 +POSTHOOK: Output: default@pcs_t1@ds=2000-04-10 #### A masked pattern was here #### PREHOOK: query: explain extended select key, value, ds from pcs_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/perf/query23.q.out ql/src/test/results/clientpositive/perf/query23.q.out index dde707d0ea..5925869cbf 100644 --- ql/src/test/results/clientpositive/perf/query23.q.out +++ ql/src/test/results/clientpositive/perf/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt diff --git ql/src/test/results/clientpositive/pointlookup2.q.out ql/src/test/results/clientpositive/pointlookup2.q.out index 158e88522f..c99a3223e4 100644 --- ql/src/test/results/clientpositive/pointlookup2.q.out +++ ql/src/test/results/clientpositive/pointlookup2.q.out @@ -136,7 +136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -183,7 +183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -334,7 +334,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -557,7 +557,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -604,7 +604,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -821,7 +821,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -868,7 +868,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -913,7 +913,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -934,7 +934,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1136,7 +1136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1183,7 +1183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1230,7 +1230,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1275,7 +1275,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1296,7 +1296,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value diff --git ql/src/test/results/clientpositive/pointlookup3.q.out ql/src/test/results/clientpositive/pointlookup3.q.out index eb61e17cd9..0057d1df94 100644 --- ql/src/test/results/clientpositive/pointlookup3.q.out +++ ql/src/test/results/clientpositive/pointlookup3.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -418,7 +418,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -642,7 +642,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -690,7 +690,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -908,7 +908,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -956,7 +956,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1004,7 +1004,7 @@ STAGE PLANS: ds1 2000-04-10 ds2 2001-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/pointlookup4.q.out ql/src/test/results/clientpositive/pointlookup4.q.out index 8ef5551369..3c9cc60903 100644 --- ql/src/test/results/clientpositive/pointlookup4.q.out +++ ql/src/test/results/clientpositive/pointlookup4.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -315,7 +315,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/ppd_constant_expr.q.out ql/src/test/results/clientpositive/ppd_constant_expr.q.out index cbe76549f7..ad57bf13b6 100644 --- ql/src/test/results/clientpositive/ppd_constant_expr.q.out +++ ql/src/test/results/clientpositive/ppd_constant_expr.q.out @@ -43,6 +43,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -64,7 +90,12 @@ STAGE PLANS: name: default.ppd_constant_expr Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr Stage: Stage-3 Map Reduce @@ -179,6 +210,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -200,7 +257,12 @@ STAGE PLANS: name: default.ppd_constant_expr Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/ppd_multi_insert.q.out ql/src/test/results/clientpositive/ppd_multi_insert.q.out index 7e501c71c0..0e64376862 100644 --- ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -47,11 +47,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-9 depends on stages: Stage-2, Stage-6, Stage-8, Stage-10, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -105,6 +108,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -120,6 +138,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +168,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -161,7 +210,50 @@ STAGE PLANS: name: default.mi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 Stage: Stage-1 Move Operator @@ -173,8 +265,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -189,8 +300,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Move Operator @@ -1312,11 +1449,14 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-1, Stage-6, Stage-8, Stage-10, Stage-3 + Stage-9 depends on stages: Stage-2, Stage-6, Stage-8, Stage-10, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -1370,6 +1510,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1385,6 +1540,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1400,6 +1570,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -1426,7 +1612,50 @@ STAGE PLANS: name: default.mi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-9 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 Stage: Stage-1 Move Operator @@ -1438,8 +1667,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -1454,8 +1702,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 - Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Move Operator diff --git ql/src/test/results/clientpositive/push_or.q.out ql/src/test/results/clientpositive/push_or.q.out index dacdc40192..913fc6728b 100644 --- ql/src/test/results/clientpositive/push_or.q.out +++ ql/src/test/results/clientpositive/push_or.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -120,7 +120,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/quote1.q.out ql/src/test/results/clientpositive/quote1.q.out index f8592c4e00..1992cc8449 100644 --- ql/src/test/results/clientpositive/quote1.q.out +++ ql/src/test/results/clientpositive/quote1.q.out @@ -46,6 +46,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + outputColumnNames: location, type, table + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(location, 'hll'), compute_stats(type, 'hll') + keys: table (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,7 +103,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: location, type + Column Types: int, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/quotedid_stats.q.out ql/src/test/results/clientpositive/quotedid_stats.q.out index 40e23bf773..b85eb26568 100644 --- ql/src/test/results/clientpositive/quotedid_stats.q.out +++ ql/src/test/results/clientpositive/quotedid_stats.q.out @@ -44,10 +44,12 @@ Storage Desc Params: PREHOOK: query: analyze table t4 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@t4 +PREHOOK: Output: default@t4 #### A masked pattern was here #### POSTHOOK: query: analyze table t4 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 +POSTHOOK: Output: default@t4 #### A masked pattern was here #### PREHOOK: query: describe formatted t4 PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/rand_partitionpruner2.q.out ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index 9b29136ba1..0e8ec13893 100644 --- ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -72,6 +72,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +190,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -213,8 +258,14 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/rcfile_default_format.q.out ql/src/test/results/clientpositive/rcfile_default_format.q.out index 97bc8d2c66..d8b9860674 100644 --- ql/src/test/results/clientpositive/rcfile_default_format.q.out +++ ql/src/test/results/clientpositive/rcfile_default_format.q.out @@ -121,7 +121,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 500 rawDataSize 1406 diff --git ql/src/test/results/clientpositive/rcfile_null_value.q.out ql/src/test/results/clientpositive/rcfile_null_value.q.out index 12b714088a..01de2d0767 100644 --- ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -90,7 +90,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -149,6 +150,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.dest1_rc + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -161,7 +177,34 @@ STAGE PLANS: name: default.dest1_rc Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_rc + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/remove_exprs_stats.q.out ql/src/test/results/clientpositive/remove_exprs_stats.q.out index 4600e71f09..28caeb833d 100644 --- ql/src/test/results/clientpositive/remove_exprs_stats.q.out +++ ql/src/test/results/clientpositive/remove_exprs_stats.q.out @@ -55,10 +55,12 @@ POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc #### A masked pattern was here #### PREHOOK: query: explain select * from loc_orc where locid < 30 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/rename_external_partition_location.q.out ql/src/test/results/clientpositive/rename_external_partition_location.q.out index 9665bce88a..5fb71a49f5 100644 --- ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -69,11 +69,15 @@ PREHOOK: query: ANALYZE TABLE ex_table PARTITION (part='part1') COMPUTE STATISTI PREHOOK: type: QUERY PREHOOK: Input: default@ex_table PREHOOK: Input: default@ex_table@part=part1 +PREHOOK: Output: default@ex_table +PREHOOK: Output: default@ex_table@part=part1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE ex_table PARTITION (part='part1') COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@ex_table POSTHOOK: Input: default@ex_table@part=part1 +POSTHOOK: Output: default@ex_table +POSTHOOK: Output: default@ex_table@part=part1 #### A masked pattern was here #### PREHOOK: query: DESCRIBE FORMATTED ex_table PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out index d135485991..a62575f0e0 100644 --- ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out +++ ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out @@ -47,10 +47,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -193,10 +195,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 diff --git ql/src/test/results/clientpositive/sample1.q.out ql/src/test/results/clientpositive/sample1.q.out index dec9b233b3..6d586585f5 100644 --- ql/src/test/results/clientpositive/sample1.q.out +++ ql/src/test/results/clientpositive/sample1.q.out @@ -74,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +143,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -166,8 +211,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample2.q.out ql/src/test/results/clientpositive/sample2.q.out index f54c57363a..164ce43f79 100644 --- ql/src/test/results/clientpositive/sample2.q.out +++ ql/src/test/results/clientpositive/sample2.q.out @@ -73,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +142,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -165,8 +210,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample4.q.out ql/src/test/results/clientpositive/sample4.q.out index 675fda9ef4..ff97dd0d6c 100644 --- ql/src/test/results/clientpositive/sample4.q.out +++ ql/src/test/results/clientpositive/sample4.q.out @@ -73,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +142,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -165,8 +210,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample5.q.out ql/src/test/results/clientpositive/sample5.q.out index 583784e9b7..6d50ff5588 100644 --- ql/src/test/results/clientpositive/sample5.q.out +++ ql/src/test/results/clientpositive/sample5.q.out @@ -74,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -166,8 +211,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample6.q.out ql/src/test/results/clientpositive/sample6.q.out index 36e6906785..af8537741a 100644 --- ql/src/test/results/clientpositive/sample6.q.out +++ ql/src/test/results/clientpositive/sample6.q.out @@ -73,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +142,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -165,8 +210,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/sample7.q.out ql/src/test/results/clientpositive/sample7.q.out index f0d9088174..f24f297ebc 100644 --- ql/src/test/results/clientpositive/sample7.q.out +++ ql/src/test/results/clientpositive/sample7.q.out @@ -74,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -166,8 +211,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/skewjoin.q.out ql/src/test/results/clientpositive/skewjoin.q.out index cd7d6fa053..2f214aaf3f 100644 --- ql/src/test/results/clientpositive/skewjoin.q.out +++ ql/src/test/results/clientpositive/skewjoin.q.out @@ -80,11 +80,12 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0 - Stage-6 - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 - Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-1 , consists of Stage-7, Stage-0, Stage-3 + Stage-7 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -143,11 +144,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -161,7 +177,7 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -184,6 +200,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -198,7 +229,34 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/skewjoin_noskew.q.out ql/src/test/results/clientpositive/skewjoin_noskew.q.out index 243a840406..d39acaf10a 100644 --- ql/src/test/results/clientpositive/skewjoin_noskew.q.out +++ ql/src/test/results/clientpositive/skewjoin_noskew.q.out @@ -146,7 +146,8 @@ STAGE PLANS: name: default.noskew Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out index 09aaaf684a..a5c0df9aa7 100644 --- ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out +++ ql/src/test/results/clientpositive/skewjoin_onesideskew.q.out @@ -186,7 +186,8 @@ STAGE PLANS: name: default.result Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: CREATE TABLE result AS SELECT a.* FROM skewtable a JOIN nonskewtable b ON a.key=b.key PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/smb_mapjoin9.q.out ql/src/test/results/clientpositive/smb_mapjoin9.q.out index 6916ce8f0a..433344f302 100644 --- ql/src/test/results/clientpositive/smb_mapjoin9.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin9.q.out @@ -280,7 +280,8 @@ STAGE PLANS: name: default.smb_mapjoin9_results Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index b53e6704cc..ef48acb653 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -49,7 +49,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +94,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +173,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -202,8 +229,86 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY @@ -1827,7 +1932,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2041,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 9af4683451..c083eae14e 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -129,7 +129,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -204,8 +204,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY @@ -298,7 +304,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -409,8 +415,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' diff --git ql/src/test/results/clientpositive/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index 49ff6355b3..3bc00c8973 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git ql/src/test/results/clientpositive/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 48e4d05a66..d42026edb9 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -73,6 +74,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,7 +104,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' @@ -202,7 +253,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' @@ -1319,6 +1375,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1350,6 +1407,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1364,5 +1437,39 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index c0fdfd38d2..1fb0c8ff91 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -76,7 +76,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -107,7 +112,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -139,6 +145,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -153,7 +175,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -184,7 +240,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,6 +272,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -229,7 +302,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -260,7 +367,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -292,6 +400,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -306,7 +430,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -337,7 +495,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -369,6 +528,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -383,7 +558,41 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -414,7 +623,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -445,6 +655,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -459,5 +685,39 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe diff --git ql/src/test/results/clientpositive/smb_mapjoin_22.q.out ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 36e879236f..5958232afa 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -72,7 +72,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 @@ -212,7 +217,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 diff --git ql/src/test/results/clientpositive/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 82f5804eea..b71c5b87c1 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -648,6 +648,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -669,7 +690,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index 40c0ec5b59..5754a74478 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -101,7 +101,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 1 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index 0740df3079..5dc2426fe4 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -75,7 +75,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -184,7 +184,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index fc5066c0f7..28501ab1d0 100644 --- ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -111,7 +111,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -158,7 +158,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -259,7 +259,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -308,7 +308,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out index 0d5ba01960..f0cf74ca81 100644 --- ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out +++ ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -241,7 +241,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -290,7 +290,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 02d1605f40..118a48eb57 100644 --- ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -97,26 +97,32 @@ POSTHOOK: Output: default@loc PREHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid PREHOOK: type: QUERY PREHOOK: Input: default@emp +PREHOOK: Output: default@emp #### A masked pattern was here #### POSTHOOK: query: analyze table emp compute statistics for columns lastname,deptid,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp +POSTHOOK: Output: default@emp #### A masked pattern was here #### PREHOOK: query: analyze table dept compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept +PREHOOK: Output: default@dept #### A masked pattern was here #### POSTHOOK: query: analyze table dept compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept +POSTHOOK: Output: default@dept #### A masked pattern was here #### PREHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc +PREHOOK: Output: default@loc #### A masked pattern was here #### POSTHOOK: query: analyze table loc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc +POSTHOOK: Output: default@loc #### A masked pattern was here #### PREHOOK: query: explain select * from emp e join dept d on (e.deptid = d.deptid) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/auto_join1.q.out ql/src/test/results/clientpositive/spark/auto_join1.q.out index d9cd7700cc..3f810d11cb 100644 --- ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +46,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +70,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) @@ -82,8 +84,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,7 +125,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/spark/auto_join14.q.out ql/src/test/results/clientpositive/spark/auto_join14.q.out index 82deefea73..9b624b9998 100644 --- ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -46,6 +46,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -82,8 +84,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,7 +125,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value diff --git ql/src/test/results/clientpositive/spark/auto_join17.q.out ql/src/test/results/clientpositive/spark/auto_join17.q.out index 6d63fa68a3..0283ad284b 100644 --- ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +46,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +70,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) @@ -82,8 +84,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,7 +125,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/spark/auto_join19.q.out ql/src/test/results/clientpositive/spark/auto_join19.q.out index 88ef3f1981..a7e2bf5d1b 100644 --- ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -48,6 +48,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +72,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col4 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) @@ -84,8 +86,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,7 +127,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/spark/auto_join2.q.out ql/src/test/results/clientpositive/spark/auto_join2.q.out index e32abba2f4..1b294d443f 100644 --- ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -43,7 +43,7 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -64,6 +64,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -86,7 +88,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -96,7 +98,7 @@ STAGE PLANS: 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col3 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) @@ -110,8 +112,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -124,7 +153,12 @@ STAGE PLANS: name: default.dest_j2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/spark/auto_join26.q.out ql/src/test/results/clientpositive/spark/auto_join26.q.out index bfb3564cf8..723ee0ee11 100644 --- ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -48,6 +48,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -106,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +146,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key diff --git ql/src/test/results/clientpositive/spark/auto_join3.q.out ql/src/test/results/clientpositive/spark/auto_join3.q.out index a17cc1a0a1..fd463d38d8 100644 --- ql/src/test/results/clientpositive/spark/auto_join3.q.out +++ ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -44,7 +44,7 @@ STAGE PLANS: 2 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -66,6 +66,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,8 +92,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) @@ -105,8 +107,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -119,7 +148,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/spark/auto_join4.q.out ql/src/test/results/clientpositive/spark/auto_join4.q.out index ca71d9f2ed..f060a04d52 100644 --- ql/src/test/results/clientpositive/spark/auto_join4.q.out +++ ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -47,7 +47,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -68,6 +68,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,7 +92,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) @@ -104,8 +106,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +147,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/auto_join5.q.out ql/src/test/results/clientpositive/spark/auto_join5.q.out index 2b11a96767..7ee7598cd8 100644 --- ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -68,6 +68,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -104,8 +106,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +147,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/auto_join6.q.out ql/src/test/results/clientpositive/spark/auto_join6.q.out index 4f1e6316b8..f8c746af59 100644 --- ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -45,7 +45,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +146,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/auto_join7.q.out ql/src/test/results/clientpositive/spark/auto_join7.q.out index 0e6e2f5b73..380f31a58f 100644 --- ql/src/test/results/clientpositive/spark/auto_join7.q.out +++ ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -55,7 +55,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,7 +77,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -94,7 +95,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -136,6 +137,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -148,7 +176,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/auto_join8.q.out ql/src/test/results/clientpositive/spark/auto_join8.q.out index c0795366f5..b0df0f5ace 100644 --- ql/src/test/results/clientpositive/spark/auto_join8.q.out +++ ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -47,7 +47,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -68,6 +68,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,7 +92,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 is null (type: boolean) @@ -107,8 +109,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,7 +150,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/auto_join9.q.out ql/src/test/results/clientpositive/spark/auto_join9.q.out index b2ed51c108..ad32985ace 100644 --- ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -25,7 +25,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +46,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +70,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col4 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) @@ -82,8 +84,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,7 +125,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' diff --git ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index ff94451963..5efde9884c 100644 --- ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -126,7 +126,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -147,7 +147,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -199,7 +199,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -220,7 +220,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -272,7 +272,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -293,7 +293,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -345,7 +345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -366,7 +366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -418,7 +418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -439,7 +439,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index c26949f13c..5a217fa5e4 100644 --- ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -1333,9 +1333,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -1371,6 +1374,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1379,6 +1395,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1391,7 +1472,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1404,7 +1490,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -1532,10 +1623,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: a @@ -1564,6 +1657,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1582,6 +1708,20 @@ STAGE PLANS: Reducer 2 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial @@ -1599,6 +1739,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1611,7 +1778,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1624,7 +1796,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 PREHOOK: query: from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index cd4b83a2a1..67a1267136 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -76,9 +76,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -110,6 +113,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -122,6 +138,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -134,7 +215,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -147,7 +233,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -261,9 +352,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -295,6 +389,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -307,6 +414,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -319,7 +491,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -332,7 +509,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -438,7 +620,8 @@ INSERT OVERWRITE TABLE dest2 select value1, value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 @@ -449,7 +632,30 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: b @@ -470,9 +676,12 @@ STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: a @@ -492,7 +701,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -506,6 +715,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -520,6 +742,75 @@ STAGE PLANS: name: default.dest2 Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -532,7 +823,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -545,7 +841,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 @@ -581,28 +882,6 @@ POSTHOOK: query: select * from dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -2 2 -4 4 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -8 8 -9 9 PREHOOK: query: select * from dest2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -611,25 +890,3 @@ POSTHOOK: query: select * from dest2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_2 val_2 -val_4 val_4 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_8 val_8 -val_9 val_9 diff --git ql/src/test/results/clientpositive/spark/bucket2.q.out ql/src/test/results/clientpositive/spark/bucket2.q.out index 90c9e5469d..98558e451e 100644 --- ql/src/test/results/clientpositive/spark/bucket2.q.out +++ ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -135,6 +135,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -166,8 +201,14 @@ STAGE PLANS: name: default.bucket2_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src diff --git ql/src/test/results/clientpositive/spark/bucket3.q.out ql/src/test/results/clientpositive/spark/bucket3.q.out index 078460f9b9..1c155f04cc 100644 --- ql/src/test/results/clientpositive/spark/bucket3.q.out +++ ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -133,6 +134,60 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,8 +218,14 @@ STAGE PLANS: name: default.bucket3_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src diff --git ql/src/test/results/clientpositive/spark/bucket4.q.out ql/src/test/results/clientpositive/spark/bucket4.q.out index 13e21b6610..fd69c4bc87 100644 --- ql/src/test/results/clientpositive/spark/bucket4.q.out +++ ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -137,6 +137,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -169,8 +204,14 @@ STAGE PLANS: name: default.bucket4_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket4_1 select * from src diff --git ql/src/test/results/clientpositive/spark/bucket5.q.out ql/src/test/results/clientpositive/spark/bucket5.q.out index dd24db8e3e..6803bb182f 100644 --- ql/src/test/results/clientpositive/spark/bucket5.q.out +++ ql/src/test/results/clientpositive/spark/bucket5.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -109,7 +111,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -220,9 +222,55 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -259,6 +307,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -291,8 +385,14 @@ STAGE PLANS: name: default.bucketed_table Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true Stage: Stage-1 Move Operator @@ -323,8 +423,14 @@ STAGE PLANS: name: default.unbucketed_table Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true PREHOOK: query: FROM src INSERT OVERWRITE TABLE bucketed_table SELECT key, value @@ -362,7 +468,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git ql/src/test/results/clientpositive/spark/bucket6.q.out ql/src/test/results/clientpositive/spark/bucket6.q.out index d5d53d303d..5deea3e5cc 100644 --- ql/src/test/results/clientpositive/spark/bucket6.q.out +++ ql/src/test/results/clientpositive/spark/bucket6.q.out @@ -22,6 +22,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +54,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -65,7 +93,12 @@ STAGE PLANS: name: default.src_bucket Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out index 194f16ed9c..53a98a4926 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out @@ -127,7 +127,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -209,6 +209,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -233,7 +235,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -273,6 +275,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -333,6 +351,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -363,8 +411,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -434,7 +488,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -511,6 +565,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -535,7 +591,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -553,7 +609,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -574,6 +630,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -629,6 +701,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -639,7 +741,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -659,8 +761,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out index bb66d1ec47..45cf157f3e 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out @@ -111,7 +111,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -193,6 +193,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -217,7 +219,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -257,6 +259,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -317,6 +335,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -347,8 +395,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -418,7 +472,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -500,6 +554,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -524,7 +580,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -543,7 +599,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -564,6 +620,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -624,6 +696,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -634,7 +736,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -654,8 +756,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index 10678a4c26..b2ef8bc5ea 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -193,6 +193,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -257,6 +259,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -317,6 +335,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [$hdt$_1:b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -347,8 +395,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value @@ -495,6 +549,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -537,7 +593,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -558,6 +614,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -613,6 +685,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [$hdt$_1:b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -623,7 +725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -643,8 +745,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out index 2eb5db1d20..b3f405dd29 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out @@ -111,7 +111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -134,7 +134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -187,7 +187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -210,7 +210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -317,7 +317,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -340,7 +340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -490,7 +490,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -513,7 +513,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -566,7 +566,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -589,7 +589,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -690,7 +690,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -713,7 +713,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index 724f9829db..f97c5a097f 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -383,7 +383,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -460,7 +461,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -583,6 +584,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -613,8 +660,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -738,7 +791,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -815,7 +869,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -917,7 +971,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -938,6 +992,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -948,7 +1048,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -968,8 +1068,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index 9e44d1a68d..72a093e3bb 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -110,7 +110,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -205,7 +205,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -254,7 +254,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -409,7 +409,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -510,7 +510,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -670,7 +670,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -771,7 +771,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -931,7 +931,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index 49d7cc9928..bfd00a25bb 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -109,7 +109,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -188,7 +189,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -311,6 +312,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -341,8 +388,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -470,7 +523,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -549,7 +603,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -651,7 +705,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -672,6 +726,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -682,7 +782,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -702,8 +802,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -848,7 +954,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -927,7 +1034,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1078,7 +1185,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1099,6 +1206,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1109,7 +1262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1129,8 +1282,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 1db5b2c142..5a141d975f 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -133,7 +133,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -212,7 +213,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -335,6 +336,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -365,8 +412,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -494,7 +547,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -573,7 +627,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -675,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -696,6 +750,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -706,7 +806,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -726,8 +826,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index 7fb61f0645..9d6f5f648d 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -133,7 +133,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -210,7 +211,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -331,6 +332,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -361,8 +408,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value @@ -478,7 +531,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -555,7 +609,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [$hdt$_0:a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -655,7 +709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -676,6 +730,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -686,7 +786,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -706,8 +806,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out index a9415135fb..733c62d6c6 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -261,6 +261,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -321,6 +323,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -430,6 +448,36 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -460,8 +508,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value @@ -667,6 +721,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -706,7 +762,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +783,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -836,6 +908,36 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -846,7 +948,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -866,8 +968,14 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out index 3e74c217e7..54318fd3e0 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -86,7 +86,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -159,6 +159,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -179,7 +181,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -218,6 +220,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -271,6 +289,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -301,6 +349,12 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true diff --git ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out index a3acd5dd74..ab1fa61814 100644 --- ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out +++ ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -95,7 +95,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -222,6 +222,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -242,7 +244,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -282,6 +284,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value1, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -340,6 +358,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -370,6 +418,12 @@ STAGE PLANS: name: default.bucketmapjoin_tmp_result Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index 81a064b2b7..2126135875 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -122,6 +122,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -172,6 +173,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -186,7 +222,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -298,6 +339,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -348,6 +390,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -362,7 +439,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -498,6 +580,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -548,6 +631,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -562,7 +680,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -704,6 +827,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -754,6 +878,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -768,7 +927,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -892,6 +1056,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -942,6 +1107,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -956,7 +1156,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) @@ -1080,6 +1285,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1130,6 +1336,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1144,7 +1385,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out index 4245aa1d99..e5165d9e64 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out @@ -94,6 +94,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -140,6 +141,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -154,7 +190,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) @@ -285,6 +326,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -331,6 +373,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -345,7 +422,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out index 5e4e5ef8ad..87e13cc4bb 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out @@ -100,6 +100,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -150,6 +151,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -164,7 +200,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) @@ -283,6 +324,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -333,6 +375,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -347,7 +424,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from @@ -466,6 +548,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -516,6 +599,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -530,7 +648,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -584,6 +707,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -634,6 +758,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -648,7 +807,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') @@ -708,6 +872,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -758,6 +923,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -772,7 +972,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from @@ -909,6 +1114,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -959,6 +1165,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -973,7 +1214,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from @@ -1120,6 +1366,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1170,6 +1417,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1184,5 +1466,10 @@ STAGE PLANS: name: default.test_table4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table4 diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index 7c23da7cc2..e3e2e5dfaa 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -100,6 +100,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -150,6 +151,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -164,7 +200,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -280,6 +321,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -330,6 +372,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -344,7 +421,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -466,6 +548,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -516,6 +599,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -530,7 +648,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out index 8263b6f681..418f040f35 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out @@ -98,6 +98,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -148,6 +149,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -162,7 +198,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) @@ -275,6 +316,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -325,6 +367,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(key2, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -339,7 +416,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) diff --git ql/src/test/results/clientpositive/spark/ctas.q.out ql/src/test/results/clientpositive/spark/ctas.q.out index 49699308e2..222e44be6a 100644 --- ql/src/test/results/clientpositive/spark/ctas.q.out +++ ql/src/test/results/clientpositive/spark/ctas.q.out @@ -94,7 +94,8 @@ STAGE PLANS: name: default.nzhang_CTAS1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -241,7 +242,8 @@ STAGE PLANS: name: default.nzhang_ctas2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas2 as select * from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -388,7 +390,8 @@ STAGE PLANS: name: default.nzhang_ctas3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas3 row format serde "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" stored as RCFile as select key/2 half_key, concat(value, "_con") conb from src sort by half_key, conb limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -600,7 +603,8 @@ STAGE PLANS: name: default.nzhang_ctas4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas4 row format delimited fields terminated by ',' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT @@ -751,7 +755,8 @@ STAGE PLANS: name: default.nzhang_ctas5 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table nzhang_ctas5 row format delimited fields terminated by ',' lines terminated by '\012' stored as textfile as select key, value from src sort by key, value limit 10 PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out index 8693026c71..988d29d5e2 100644 --- ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out +++ ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out @@ -135,6 +135,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -166,8 +201,14 @@ STAGE PLANS: name: default.bucket2_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket2_1 select * from src diff --git ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index 1ed388f0c7..fdc9749c90 100644 --- ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -162,11 +162,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -207,9 +209,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -227,6 +256,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -239,7 +295,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -252,7 +313,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY @@ -320,8 +386,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -340,7 +407,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -356,7 +423,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -395,7 +462,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -418,7 +512,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -441,6 +548,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -453,7 +573,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 718346f83a..8803c5db2e 100644 --- ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -113,7 +113,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f] Map 4 Map Operator Tree: TableScan @@ -148,7 +148,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -188,7 +188,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:m] Map 5 Map Operator Tree: TableScan @@ -222,7 +222,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -262,7 +262,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] Reducer 2 Needs Tagging: true Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/groupby1.q.out ql/src/test/results/clientpositive/spark/groupby1.q.out index 42ce2430d3..68cb6d7176 100644 --- ql/src/test/results/clientpositive/spark/groupby1.q.out +++ ql/src/test/results/clientpositive/spark/groupby1.q.out @@ -86,7 +86,8 @@ STAGE PLANS: name: default.dest_g1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby10.q.out ql/src/test/results/clientpositive/spark/groupby10.q.out index b572995b32..69949ac2f9 100644 --- ql/src/test/results/clientpositive/spark/groupby10.q.out +++ ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -51,13 +51,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: input @@ -105,9 +109,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: partial1 @@ -119,7 +157,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -139,6 +177,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -151,7 +223,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -164,7 +241,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -263,13 +345,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: input @@ -317,9 +403,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: partial1 @@ -331,7 +451,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -351,6 +471,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -363,7 +517,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -376,7 +535,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -475,7 +639,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -492,7 +659,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE @@ -514,6 +709,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -532,6 +735,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Stage: Stage-0 Move Operator @@ -544,7 +769,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -557,7 +787,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git ql/src/test/results/clientpositive/spark/groupby11.q.out ql/src/test/results/clientpositive/spark/groupby11.q.out index a0f99c4cfa..485f12cd2a 100644 --- ql/src/test/results/clientpositive/spark/groupby11.q.out +++ ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -39,13 +39,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 6 <- Map 11 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -59,7 +63,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 11 Map Operator Tree: TableScan alias: src @@ -107,9 +111,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: partial1 @@ -121,7 +168,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -141,6 +188,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -155,7 +245,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -170,7 +265,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 partition(ds='111') diff --git ql/src/test/results/clientpositive/spark/groupby1_map.q.out ql/src/test/results/clientpositive/spark/groupby1_map.q.out index b414aa62a3..fab32b3c16 100644 --- ql/src/test/results/clientpositive/spark/groupby1_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby1_map.q.out @@ -22,6 +22,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +66,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,7 +105,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out index a01cee1d49..764ee2e440 100644 --- ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out +++ ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out @@ -22,6 +22,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +66,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,7 +105,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out index f7b7f7a185..bbbef6ff5e 100644 --- ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out @@ -23,6 +23,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,6 +81,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +120,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out index 1b7e53b3eb..c092cfccc2 100644 --- ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out @@ -22,6 +22,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,6 +60,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,7 +94,12 @@ STAGE PLANS: name: default.dest_g1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby2.q.out ql/src/test/results/clientpositive/spark/groupby2.q.out index a5cd0e6066..8fda529d9f 100644 --- ql/src/test/results/clientpositive/spark/groupby2.q.out +++ ql/src/test/results/clientpositive/spark/groupby2.q.out @@ -25,6 +25,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +77,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -87,7 +123,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby2_map.q.out ql/src/test/results/clientpositive/spark/groupby2_map.q.out index d2b69af851..b98f441dfc 100644 --- ql/src/test/results/clientpositive/spark/groupby2_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby2_map.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +68,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,7 +107,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out index 4ad056887d..5ed3cc7a48 100644 --- ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +68,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,7 +107,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) @@ -132,6 +165,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -175,6 +209,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2160 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -187,7 +248,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out index f4a567ea9b..4da02c8bd2 100644 --- ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out @@ -25,6 +25,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,6 +83,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,7 +122,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out index 8ecf769b43..8337ff0ba4 100644 --- ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +61,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,7 +95,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out index 3ede0fc755..b465254b41 100644 --- ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,6 +62,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,7 +96,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby3.q.out ql/src/test/results/clientpositive/spark/groupby3.q.out index a34e89e1ef..6a4bfd1ede 100644 --- ql/src/test/results/clientpositive/spark/groupby3.q.out +++ ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -43,6 +43,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +69,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -89,6 +91,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -101,7 +130,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/spark/groupby3_map.q.out ql/src/test/results/clientpositive/spark/groupby3_map.q.out index 71f8dc0191..c283f1f07b 100644 --- ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -83,6 +83,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,7 +115,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index 47ef5cb6ae..505c639c8b 100644 --- ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -87,6 +87,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -99,7 +119,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index 7cfca81d71..dd1103af7c 100644 --- ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -96,6 +96,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -108,7 +128,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index b2993a6e85..07206e9ad1 100644 --- ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -76,6 +76,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,7 +104,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index d152a07c77..301978285d 100644 --- ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -80,6 +80,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5008 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +108,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT diff --git ql/src/test/results/clientpositive/spark/groupby4.q.out ql/src/test/results/clientpositive/spark/groupby4.q.out index 3ad01d0d70..55b08ff61c 100644 --- ql/src/test/results/clientpositive/spark/groupby4.q.out +++ ql/src/test/results/clientpositive/spark/groupby4.q.out @@ -25,6 +25,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +70,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,7 +116,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby4_map.q.out ql/src/test/results/clientpositive/spark/groupby4_map.q.out index 7cb360033e..cf93ed9c3a 100644 --- ql/src/test/results/clientpositive/spark/groupby4_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby4_map.q.out @@ -59,6 +59,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,7 +91,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out index ef287ad05a..3f894d9ede 100644 --- ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out @@ -59,6 +59,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,7 +91,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out index 04f58fa671..46e0fb5cfc 100644 --- ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -55,6 +56,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +90,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby5.q.out ql/src/test/results/clientpositive/spark/groupby5.q.out index d292f747e6..6f2723dd71 100644 --- ql/src/test/results/clientpositive/spark/groupby5.q.out +++ ql/src/test/results/clientpositive/spark/groupby5.q.out @@ -29,6 +29,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,6 +82,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +128,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) diff --git ql/src/test/results/clientpositive/spark/groupby5_map.q.out ql/src/test/results/clientpositive/spark/groupby5_map.q.out index add30941b2..1a7e0bdf1a 100644 --- ql/src/test/results/clientpositive/spark/groupby5_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby5_map.q.out @@ -61,6 +61,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out index 924ef5dbc7..7f13bda36d 100644 --- ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out @@ -61,6 +61,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,7 +93,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out index 300ccb64c3..27a45a1c7d 100644 --- ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out @@ -28,6 +28,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +66,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,7 +100,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) diff --git ql/src/test/results/clientpositive/spark/groupby6.q.out ql/src/test/results/clientpositive/spark/groupby6.q.out index 4f406d7d07..2a61ce52c2 100644 --- ql/src/test/results/clientpositive/spark/groupby6.q.out +++ ql/src/test/results/clientpositive/spark/groupby6.q.out @@ -25,6 +25,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +70,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,7 +116,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/spark/groupby6_map.q.out ql/src/test/results/clientpositive/spark/groupby6_map.q.out index 03f68c63b2..626986c14d 100644 --- ql/src/test/results/clientpositive/spark/groupby6_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby6_map.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +61,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,7 +100,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out index 606b5d5272..c235228023 100644 --- ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out @@ -25,6 +25,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -85,7 +113,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out index eb72f011b2..210343f84f 100644 --- ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -55,6 +56,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +90,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) diff --git ql/src/test/results/clientpositive/spark/groupby7_map.q.out ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 31daab8acc..76236e4c1e 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -80,9 +82,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -100,6 +129,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -112,7 +168,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -125,7 +186,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out index 625a7374c9..dff2bd9ab3 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out @@ -35,7 +35,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -53,7 +56,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -75,6 +106,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -93,6 +137,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -105,7 +176,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -118,7 +194,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index 4fbfd30ec6..9e1f494e4d 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -35,12 +35,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 5 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP, 31) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: src @@ -95,7 +97,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -115,6 +144,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -127,7 +183,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -140,7 +201,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index a26247af8d..83649352f1 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -74,9 +76,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: complete @@ -94,6 +118,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -106,7 +152,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -119,7 +170,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index 2dce3016b3..b40b4cc3fb 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -35,9 +35,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 5 (SORT, 1) - Reducer 4 <- Reducer 5 (SORT, 1) - Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 7 (SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 7 (SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -76,8 +78,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 @@ -97,7 +121,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -125,7 +171,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -138,7 +189,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 diff --git ql/src/test/results/clientpositive/spark/groupby8.q.out ql/src/test/results/clientpositive/spark/groupby8.q.out index f7f3279e6b..2c46e93d7e 100644 --- ql/src/test/results/clientpositive/spark/groupby8.q.out +++ ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -35,12 +35,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 5 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 2 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -88,9 +92,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final @@ -108,6 +146,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,7 +192,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -133,7 +210,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -808,12 +890,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 5 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 2 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -861,9 +947,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final @@ -881,6 +1001,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -893,7 +1047,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -906,7 +1065,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby8_map.q.out ql/src/test/results/clientpositive/spark/groupby8_map.q.out index 288ca3f3b2..afc5408d84 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -35,7 +35,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +55,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,6 +105,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -92,6 +136,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -104,7 +175,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -117,7 +193,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index 9e76fd57aa..f155da09b9 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -35,12 +35,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 5 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP, 31) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: src @@ -94,7 +96,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -114,6 +143,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 872 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 888 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -126,7 +182,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -139,7 +200,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out index 288ca3f3b2..1a0feba79b 100644 --- ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -35,7 +35,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +55,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,6 +105,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -92,6 +131,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Stage: Stage-0 Move Operator @@ -104,7 +165,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -117,7 +183,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby9.q.out ql/src/test/results/clientpositive/spark/groupby9.q.out index d59d8cf706..68b059172e 100644 --- ql/src/test/results/clientpositive/spark/groupby9.q.out +++ ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -59,7 +61,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -99,9 +101,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -119,6 +148,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -131,7 +187,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -144,7 +205,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -820,11 +886,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -844,7 +912,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -884,9 +952,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -904,6 +999,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -916,7 +1038,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -929,7 +1056,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -1605,11 +1737,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -1629,7 +1763,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -1669,9 +1803,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1689,6 +1850,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1701,7 +1889,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -1714,7 +1907,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -2390,11 +2588,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -2415,7 +2615,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -2456,9 +2656,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2476,6 +2703,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2488,7 +2742,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -2501,7 +2760,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -3177,11 +3441,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -3201,7 +3467,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -3241,9 +3507,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -3261,6 +3554,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -3273,7 +3593,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -3286,7 +3611,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out index 169ee04403..3b5ec42472 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out @@ -175,7 +175,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-1 Move Operator @@ -188,7 +189,8 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-2 Move Operator @@ -201,7 +203,8 @@ STAGE PLANS: name: default.dest3 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out index 7a730d230d..71e74a92ae 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -159,7 +159,8 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-1 Move Operator @@ -172,7 +173,8 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key) as keyarray, COUNT(1) GROUP BY ARRAY(SRC.key) ORDER BY keyarray limit 10 diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 52c87efaf0..e01877aaf1 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -574,13 +574,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -601,7 +603,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Map 7 + Map 9 Map Operator Tree: TableScan alias: t1 @@ -657,9 +659,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -671,7 +700,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -692,6 +721,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -704,7 +760,12 @@ STAGE PLANS: name: default.t2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-1 Move Operator @@ -717,7 +778,12 @@ STAGE PLANS: name: default.t3 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube diff --git ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index f3a95bdde5..400882b623 100644 --- ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -30,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,6 +205,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -234,8 +281,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index 666f355a4c..dddd334261 100644 --- ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -30,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,6 +205,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2168 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2216 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2216 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -234,8 +281,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index bf4132a974..5777736041 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -59,7 +61,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -99,9 +101,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -119,6 +148,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -131,7 +187,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -144,7 +205,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 PREHOOK: query: from src insert overwrite table dest1 select key, count(distinct value) group by key diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index c16df1b667..98daf142bb 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -63,7 +63,12 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Reducer 7 (GROUP, 1) + Reducer 5 <- Reducer 8 (GROUP, 1) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -81,7 +86,49 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -145,6 +205,66 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 8 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Stage: Stage-2 Move Operator @@ -157,7 +277,12 @@ STAGE PLANS: name: default.dest_g4 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 Stage: Stage-0 Move Operator @@ -170,7 +295,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 Stage: Stage-1 Move Operator @@ -183,7 +313,12 @@ STAGE PLANS: name: default.dest_g3 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) @@ -295,12 +430,20 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 2 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 12 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 15 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 12 (GROUP, 1) + Reducer 4 <- Reducer 13 (GROUP, 1) + Reducer 5 <- Reducer 14 (GROUP, 1) + Reducer 7 <- Reducer 15 (SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) + Reducer 9 <- Reducer 16 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan alias: src @@ -315,7 +458,7 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 + Map 11 Map Operator Tree: TableScan alias: src @@ -330,7 +473,7 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 12 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -352,6 +495,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -394,7 +550,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Reducer 3 + Reducer 13 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 14 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 15 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -412,7 +628,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) @@ -435,8 +650,80 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Reducer 16 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -456,6 +743,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -468,7 +796,12 @@ STAGE PLANS: name: default.dest_g4 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 Stage: Stage-0 Move Operator @@ -481,7 +814,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 Stage: Stage-1 Move Operator @@ -494,7 +832,12 @@ STAGE PLANS: name: default.dest_g3 Stage: Stage-8 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 Stage: Stage-3 Move Operator @@ -507,7 +850,12 @@ STAGE PLANS: name: default.dest_h2 Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 Stage: Stage-4 Move Operator @@ -520,7 +868,12 @@ STAGE PLANS: name: default.dest_h3 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out index 37deb9336f..d29f6f038b 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out @@ -35,7 +35,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +55,35 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE @@ -77,6 +108,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +142,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -110,7 +184,12 @@ STAGE PLANS: name: default.dest_g2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 Stage: Stage-1 Move Operator @@ -123,7 +202,12 @@ STAGE PLANS: name: default.dest_g3 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index b6127475ea..cd51af5944 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -47,7 +47,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -64,7 +67,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -89,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +154,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -122,7 +196,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-1 Move Operator @@ -135,7 +214,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 PREHOOK: query: from src insert overwrite table e1 @@ -219,7 +303,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -236,7 +323,35 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -261,6 +376,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -282,6 +410,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -294,7 +452,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-1 Move Operator @@ -307,7 +470,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 PREHOOK: query: from src insert overwrite table e1 @@ -391,7 +559,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -408,7 +579,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +632,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -454,6 +666,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -466,7 +708,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-1 Move Operator @@ -479,7 +726,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 PREHOOK: query: from src insert overwrite table e1 @@ -563,7 +815,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -580,7 +835,35 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -605,6 +888,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -626,6 +922,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -638,7 +964,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-1 Move Operator @@ -651,7 +982,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 PREHOOK: query: from src insert overwrite table e1 diff --git ql/src/test/results/clientpositive/spark/groupby_position.q.out ql/src/test/results/clientpositive/spark/groupby_position.q.out index ae27cc1a87..9608d3a9dc 100644 --- ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -58,7 +60,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -97,9 +99,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -117,6 +146,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -129,7 +185,12 @@ STAGE PLANS: name: default.testtable1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 Stage: Stage-1 Move Operator @@ -142,7 +203,12 @@ STAGE PLANS: name: default.testtable2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 @@ -226,11 +292,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -249,7 +317,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -288,9 +356,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -308,6 +403,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -320,7 +442,12 @@ STAGE PLANS: name: default.testtable1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 Stage: Stage-1 Move Operator @@ -333,7 +460,12 @@ STAGE PLANS: name: default.testtable2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 diff --git ql/src/test/results/clientpositive/spark/groupby_ppr.q.out ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index e45b5c2030..35f5fe8f2b 100644 --- ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -30,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,6 +198,47 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -227,8 +269,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out index c3c97f5a10..8e68aeaeaf 100644 --- ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out @@ -30,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,6 +198,47 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -227,8 +269,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 @@ -297,6 +345,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -443,7 +492,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -464,6 +513,51 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll'), compute_stats(VALUE._col2, 'hll'), compute_stats(VALUE._col3, 'hll'), compute_stats(VALUE._col4, 'hll'), compute_stats(VALUE._col5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2232 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -474,7 +568,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -494,8 +588,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 68670ab2fc..0a487c972f 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -407,13 +407,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -434,7 +436,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Map 7 + Map 9 Map Operator Tree: TableScan alias: t1 @@ -490,9 +492,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -504,7 +533,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -525,6 +554,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(key2, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -537,7 +593,12 @@ STAGE PLANS: name: default.t2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 Stage: Stage-1 Move Operator @@ -550,7 +611,12 @@ STAGE PLANS: name: default.t3 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup diff --git ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 8292e3a6b4..00302da281 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,6 +177,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -189,8 +237,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key @@ -243,6 +297,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -370,6 +425,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -400,8 +501,14 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -446,6 +553,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -479,7 +588,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -500,6 +609,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -555,6 +680,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -565,7 +720,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -585,8 +740,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key @@ -629,6 +790,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -662,7 +825,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -683,6 +846,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -738,6 +917,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -748,7 +957,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -768,8 +977,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k @@ -820,6 +1035,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -874,6 +1091,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -929,6 +1162,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -959,8 +1222,14 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key @@ -1014,6 +1283,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1141,6 +1411,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1171,8 +1487,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val @@ -1220,6 +1542,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1326,7 +1649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1347,6 +1670,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1357,7 +1726,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1377,8 +1746,14 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -1428,6 +1803,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1544,7 +1920,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1565,6 +1941,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1575,7 +1997,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1595,8 +2017,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from @@ -1651,6 +2079,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1684,7 +2114,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1705,6 +2135,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1760,113 +2206,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery2:$hdt$_0-subquery2:t1] + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1877,7 +2246,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1897,8 +2266,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -1929,14 +2304,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 -1 1 -2 1 2 1 3 1 -3 1 7 1 -7 1 -8 2 8 2 PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -1963,7 +2333,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1997,7 +2368,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2005,19 +2376,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2073,7 +2460,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] - Map 2 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2153,7 +2540,37 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] - Reducer 3 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2181,7 +2598,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2189,19 +2606,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2212,7 +2645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2220,20 +2653,26 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -2298,7 +2737,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2381,7 +2821,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -2487,7 +2927,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2508,6 +2948,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2518,7 +3004,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2538,8 +3024,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -2856,6 +3348,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2893,7 +3386,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -2916,7 +3409,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -2963,7 +3456,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2984,6 +3477,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2994,7 +3533,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3014,8 +3553,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -3058,6 +3603,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3091,7 +3638,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3112,6 +3659,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3121,7 +3684,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3144,7 +3707,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3167,6 +3730,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3177,7 +3770,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3197,8 +3790,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val @@ -3252,6 +3851,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3306,6 +3907,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3315,7 +3932,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3338,7 +3955,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3361,6 +3978,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3391,8 +4038,14 @@ STAGE PLANS: name: default.outputtbl5 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 @@ -3445,6 +4098,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3478,7 +4133,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3499,6 +4154,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3508,7 +4179,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3531,7 +4202,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3554,6 +4225,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3564,7 +4265,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3584,8 +4285,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from @@ -3645,6 +4352,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3678,7 +4387,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3699,6 +4408,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3708,7 +4433,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3731,7 +4456,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3754,6 +4479,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3764,7 +4519,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3784,8 +4539,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from @@ -3862,10 +4623,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 5 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -3909,6 +4672,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -3929,6 +4724,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -3941,7 +4777,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -3954,7 +4795,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -4023,10 +4869,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 5 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -4073,6 +4921,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 8.0) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4093,6 +4976,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4105,7 +5029,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -4118,7 +5047,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 42847269f3..2fab361407 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +106,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,6 +177,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -189,8 +237,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key @@ -244,6 +298,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -389,6 +444,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -419,8 +520,14 @@ STAGE PLANS: name: default.outputtbl2 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val @@ -465,6 +572,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -498,7 +607,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -519,6 +628,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -574,6 +699,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -584,7 +739,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -604,8 +759,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key @@ -648,6 +809,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -681,7 +844,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -702,6 +865,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -757,6 +936,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -767,7 +976,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -787,8 +996,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k @@ -839,6 +1054,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -893,6 +1110,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -948,6 +1181,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -978,8 +1241,14 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key @@ -1034,6 +1303,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1179,6 +1449,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1209,8 +1525,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val @@ -1259,6 +1581,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1383,7 +1706,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1404,6 +1727,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1414,7 +1783,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1434,8 +1803,14 @@ STAGE PLANS: name: default.outputtbl3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -1486,6 +1861,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1620,7 +1996,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1641,6 +2017,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1651,7 +2073,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1671,8 +2093,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from @@ -1727,6 +2155,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1760,7 +2190,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1781,6 +2211,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1836,113 +2282,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [null-subquery2:$hdt$_0-subquery2:t1] + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1953,7 +2322,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1973,8 +2342,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -2005,14 +2380,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 -1 1 2 1 -2 1 -3 1 3 1 7 1 -7 1 -8 2 8 2 PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -2039,8 +2409,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2074,7 +2445,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2082,19 +2453,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2150,7 +2537,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:t1] - Map 2 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2230,7 +2617,37 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] - Reducer 3 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2248,7 +2665,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false - Reducer 4 + Reducer 5 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2276,7 +2693,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2284,19 +2701,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2307,7 +2740,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2315,20 +2748,26 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -2393,7 +2832,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2476,7 +2916,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -2582,7 +3022,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2603,6 +3043,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2613,7 +3099,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2633,8 +3119,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM @@ -2971,6 +3463,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3008,7 +3501,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3031,7 +3524,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3096,7 +3589,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3117,6 +3610,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3127,7 +3666,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3147,8 +3686,14 @@ STAGE PLANS: name: default.outputtbl1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -3191,6 +3736,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3224,7 +3771,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3245,6 +3792,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3254,7 +3817,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3277,7 +3840,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3300,6 +3863,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3310,7 +3903,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3330,8 +3923,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val @@ -3385,6 +3984,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3439,6 +4040,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3448,7 +4065,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3471,7 +4088,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3494,6 +4111,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3524,8 +4171,14 @@ STAGE PLANS: name: default.outputtbl5 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 @@ -3578,6 +4231,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3611,7 +4266,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3632,6 +4287,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3641,7 +4312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3664,7 +4335,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3687,6 +4358,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3697,7 +4398,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3717,8 +4418,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from @@ -3778,6 +4485,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3811,7 +4520,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3832,6 +4541,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3841,7 +4566,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3864,7 +4589,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3887,6 +4612,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3897,7 +4652,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3917,8 +4672,14 @@ STAGE PLANS: name: default.outputtbl4 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from @@ -3995,11 +4756,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -4043,6 +4806,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4077,6 +4872,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4089,7 +4925,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -4102,7 +4943,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key @@ -4171,11 +5017,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -4222,6 +5070,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 8.0) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4256,6 +5139,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4268,7 +5192,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -4281,7 +5210,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out index 1efb81b35f..35b68914b5 100644 --- ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out @@ -42,7 +42,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out index 175ddd6a02..973c8d0c4c 100644 --- ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out index 13219ac56d..191a642e44 100644 --- ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out @@ -32,6 +32,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -62,6 +63,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,7 +113,12 @@ STAGE PLANS: name: default.test_table Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM @@ -129,7 +170,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -169,7 +210,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git ql/src/test/results/clientpositive/spark/innerjoin.q.out ql/src/test/results/clientpositive/spark/innerjoin.q.out index 9328b99b0f..d4db94f183 100644 --- ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +44,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +84,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,7 +123,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/spark/input12.q.out ql/src/test/results/clientpositive/spark/input12.q.out index 2efd81b86a..84d8b42ba2 100644 --- ql/src/test/results/clientpositive/spark/input12.q.out +++ ql/src/test/results/clientpositive/spark/input12.q.out @@ -46,9 +46,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -68,6 +72,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +115,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key >= 200) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -110,7 +227,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -123,7 +245,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-2 Move Operator @@ -139,7 +266,12 @@ STAGE PLANS: name: default.dest3 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git ql/src/test/results/clientpositive/spark/input13.q.out ql/src/test/results/clientpositive/spark/input13.q.out index 09c7959bfb..87b8b1bb3b 100644 --- ql/src/test/results/clientpositive/spark/input13.q.out +++ ql/src/test/results/clientpositive/spark/input13.q.out @@ -39,19 +39,23 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-3 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2, Stage-3 Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -71,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -115,6 +132,106 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 200) and (key < 300)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -127,7 +244,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -140,7 +262,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 Stage: Stage-2 Move Operator @@ -156,7 +283,12 @@ STAGE PLANS: name: default.dest3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 Stage: Stage-3 Move Operator diff --git ql/src/test/results/clientpositive/spark/input14.q.out ql/src/test/results/clientpositive/spark/input14.q.out index 36f162eb90..c24c21b38f 100644 --- ql/src/test/results/clientpositive/spark/input14.q.out +++ ql/src/test/results/clientpositive/spark/input14.q.out @@ -34,6 +34,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +76,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -87,7 +115,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/spark/input17.q.out ql/src/test/results/clientpositive/spark/input17.q.out index d95dbcb61a..c7c38846a1 100644 --- ql/src/test/results/clientpositive/spark/input17.q.out +++ ql/src/test/results/clientpositive/spark/input17.q.out @@ -34,6 +34,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +73,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,7 +112,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM src_thrift diff --git ql/src/test/results/clientpositive/spark/input18.q.out ql/src/test/results/clientpositive/spark/input18.q.out index 65850b2ef5..728a5d3be0 100644 --- ql/src/test/results/clientpositive/spark/input18.q.out +++ ql/src/test/results/clientpositive/spark/input18.q.out @@ -34,6 +34,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +76,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -87,7 +115,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM src diff --git ql/src/test/results/clientpositive/spark/input1_limit.q.out ql/src/test/results/clientpositive/spark/input1_limit.q.out index dd49287a2c..7ff5067bad 100644 --- ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -35,11 +35,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 1) - Reducer 3 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -57,9 +59,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -77,7 +78,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -100,8 +100,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 @@ -121,6 +148,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -133,7 +187,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -146,7 +205,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 diff --git ql/src/test/results/clientpositive/spark/input_part2.q.out ql/src/test/results/clientpositive/spark/input_part2.q.out index e1582e2875..1925ad9956 100644 --- ql/src/test/results/clientpositive/spark/input_part2.q.out +++ ql/src/test/results/clientpositive/spark/input_part2.q.out @@ -34,9 +34,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -82,6 +85,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -224,6 +243,198 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(hr, 'hll'), compute_stats(ds, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -254,8 +465,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true Stage: Stage-1 Move Operator @@ -286,8 +503,14 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true PREHOOK: query: FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' diff --git ql/src/test/results/clientpositive/spark/insert_into1.q.out ql/src/test/results/clientpositive/spark/insert_into1.q.out index dff389db9c..645d9aa92e 100644 --- ql/src/test/results/clientpositive/spark/insert_into1.q.out +++ ql/src/test/results/clientpositive/spark/insert_into1.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,7 +40,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -62,6 +62,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,7 +101,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -138,6 +170,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,7 +186,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -176,6 +208,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -188,7 +247,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY @@ -252,6 +316,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -267,7 +332,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -290,6 +354,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -302,7 +393,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY @@ -364,6 +460,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -384,6 +482,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -396,7 +521,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY @@ -420,6 +550,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -440,6 +572,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -452,7 +611,12 @@ STAGE PLANS: name: default.insert_into1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/insert_into2.q.out ql/src/test/results/clientpositive/spark/insert_into2.q.out index 329387dd91..49ee4fcb80 100644 --- ql/src/test/results/clientpositive/spark/insert_into2.q.out +++ ql/src/test/results/clientpositive/spark/insert_into2.q.out @@ -28,6 +28,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +44,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -66,6 +66,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,7 +115,12 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY @@ -183,6 +223,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -198,7 +239,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -221,6 +261,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -235,7 +310,12 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 @@ -307,6 +387,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -322,7 +403,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -345,6 +425,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -359,7 +474,12 @@ STAGE PLANS: name: default.insert_into2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 diff --git ql/src/test/results/clientpositive/spark/insert_into3.q.out ql/src/test/results/clientpositive/spark/insert_into3.q.out index a6fac2336b..5f08ac502b 100644 --- ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -39,11 +39,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (SORT, 1) - Reducer 3 <- Map 5 (SORT, 1) + Reducer 2 <- Map 6 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -56,21 +58,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Select Operator @@ -92,8 +79,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -113,6 +127,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -125,7 +166,12 @@ STAGE PLANS: name: default.insert_into3a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a Stage: Stage-1 Move Operator @@ -138,7 +184,12 @@ STAGE PLANS: name: default.insert_into3b Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 @@ -199,11 +250,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -218,7 +271,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -241,8 +293,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 @@ -262,6 +341,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -274,7 +380,12 @@ STAGE PLANS: name: default.insert_into3a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a Stage: Stage-1 Move Operator @@ -287,7 +398,12 @@ STAGE PLANS: name: default.insert_into3b Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 diff --git ql/src/test/results/clientpositive/spark/join1.q.out ql/src/test/results/clientpositive/spark/join1.q.out index a0ee4ea5b8..52ca46acb2 100644 --- ql/src/test/results/clientpositive/spark/join1.q.out +++ ql/src/test/results/clientpositive/spark/join1.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +44,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +84,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,7 +123,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/spark/join14.q.out ql/src/test/results/clientpositive/spark/join14.q.out index e804a1d9a5..98b5f1d086 100644 --- ql/src/test/results/clientpositive/spark/join14.q.out +++ ql/src/test/results/clientpositive/spark/join14.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +44,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -83,6 +84,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,7 +123,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value diff --git ql/src/test/results/clientpositive/spark/join17.q.out ql/src/test/results/clientpositive/spark/join17.q.out index 3644efdde1..ca3a91e411 100644 --- ql/src/test/results/clientpositive/spark/join17.q.out +++ ql/src/test/results/clientpositive/spark/join17.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -100,7 +101,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [$hdt$_0:src1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -221,6 +222,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 'hll'), compute_stats(value1, 'hll'), compute_stats(key2, 'hll'), compute_stats(value2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -251,8 +298,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* diff --git ql/src/test/results/clientpositive/spark/join2.q.out ql/src/test/results/clientpositive/spark/join2.q.out index f684beb7b9..d2d6b1d992 100644 --- ql/src/test/results/clientpositive/spark/join2.q.out +++ ql/src/test/results/clientpositive/spark/join2.q.out @@ -129,7 +129,8 @@ STAGE PLANS: name: default.dest_j2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/spark/join25.q.out ql/src/test/results/clientpositive/spark/join25.q.out index 05e5e701f9..9ca88fa2fd 100644 --- ql/src/test/results/clientpositive/spark/join25.q.out +++ ql/src/test/results/clientpositive/spark/join25.q.out @@ -48,6 +48,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +86,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,7 +127,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/spark/join26.q.out ql/src/test/results/clientpositive/spark/join26.q.out index ccd4526707..8dbf530899 100644 --- ql/src/test/results/clientpositive/spark/join26.q.out +++ ql/src/test/results/clientpositive/spark/join26.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -102,7 +102,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [$hdt$_2:x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: z @@ -180,6 +180,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -206,8 +208,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 + 2 Map 4 Position of Big Table: 0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -246,6 +248,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -299,6 +317,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [$hdt$_1:y] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -329,8 +377,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/spark/join27.q.out ql/src/test/results/clientpositive/spark/join27.q.out index e10d2fb04a..95c4432680 100644 --- ql/src/test/results/clientpositive/spark/join27.q.out +++ ql/src/test/results/clientpositive/spark/join27.q.out @@ -48,6 +48,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +86,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,7 +127,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/spark/join28.q.out ql/src/test/results/clientpositive/spark/join28.q.out index 8d4d870f89..e18c4f3571 100644 --- ql/src/test/results/clientpositive/spark/join28.q.out +++ ql/src/test/results/clientpositive/spark/join28.q.out @@ -51,7 +51,7 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -72,6 +72,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -94,7 +96,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -122,8 +124,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -136,7 +165,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value diff --git ql/src/test/results/clientpositive/spark/join29.q.out ql/src/test/results/clientpositive/spark/join29.q.out index 88929fe2c9..9f60c2f2a0 100644 --- ql/src/test/results/clientpositive/spark/join29.q.out +++ ql/src/test/results/clientpositive/spark/join29.q.out @@ -70,6 +70,7 @@ STAGE PLANS: Spark Edges: Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -124,6 +125,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt1, 'hll'), compute_stats(cnt2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -136,7 +164,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt diff --git ql/src/test/results/clientpositive/spark/join3.q.out ql/src/test/results/clientpositive/spark/join3.q.out index e50f091277..47b17fa71c 100644 --- ql/src/test/results/clientpositive/spark/join3.q.out +++ ql/src/test/results/clientpositive/spark/join3.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +44,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -60,7 +61,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -102,6 +103,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,7 +142,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value diff --git ql/src/test/results/clientpositive/spark/join30.q.out ql/src/test/results/clientpositive/spark/join30.q.out index 23650ffe1b..ba1a82bd48 100644 --- ql/src/test/results/clientpositive/spark/join30.q.out +++ ql/src/test/results/clientpositive/spark/join30.q.out @@ -48,6 +48,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -106,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +146,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key diff --git ql/src/test/results/clientpositive/spark/join31.q.out ql/src/test/results/clientpositive/spark/join31.q.out index 4edb4ef976..ddf8e83ce8 100644 --- ql/src/test/results/clientpositive/spark/join31.q.out +++ ql/src/test/results/clientpositive/spark/join31.q.out @@ -31,7 +31,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -60,6 +60,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -97,7 +98,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -131,6 +132,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -143,7 +171,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt diff --git ql/src/test/results/clientpositive/spark/join34.q.out ql/src/test/results/clientpositive/spark/join34.q.out index 503235f607..5afd665859 100644 --- ql/src/test/results/clientpositive/spark/join34.q.out +++ ql/src/test/results/clientpositive/spark/join34.q.out @@ -35,7 +35,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,7 +113,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [$hdt$_0-subquery1:$hdt$_0-subquery1:x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x1 @@ -186,7 +187,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [$hdt$_0-subquery2:$hdt$_0-subquery2:x1] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -307,6 +308,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -337,8 +384,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value diff --git ql/src/test/results/clientpositive/spark/join35.q.out ql/src/test/results/clientpositive/spark/join35.q.out index 54f68af58a..6403550631 100644 --- ql/src/test/results/clientpositive/spark/join35.q.out +++ ql/src/test/results/clientpositive/spark/join35.q.out @@ -36,8 +36,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -116,7 +117,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [$hdt$_0-subquery1:$hdt$_0-subquery1:x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x1 @@ -192,7 +193,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [$hdt$_0-subquery2:$hdt$_0-subquery2:x1] - Map 6 + Map 7 Map Operator Tree: TableScan alias: x @@ -331,7 +332,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 6 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -379,8 +426,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.cnt diff --git ql/src/test/results/clientpositive/spark/join36.q.out ql/src/test/results/clientpositive/spark/join36.q.out index b1717e02e3..d9d17d9803 100644 --- ql/src/test/results/clientpositive/spark/join36.q.out +++ ql/src/test/results/clientpositive/spark/join36.q.out @@ -67,7 +67,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: y @@ -88,6 +88,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +112,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) @@ -124,8 +126,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -138,7 +167,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt diff --git ql/src/test/results/clientpositive/spark/join37.q.out ql/src/test/results/clientpositive/spark/join37.q.out index 327e93ecd2..c5ec23d794 100644 --- ql/src/test/results/clientpositive/spark/join37.q.out +++ ql/src/test/results/clientpositive/spark/join37.q.out @@ -48,6 +48,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +86,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1304 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,7 +127,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value diff --git ql/src/test/results/clientpositive/spark/join39.q.out ql/src/test/results/clientpositive/spark/join39.q.out index 8f0ba62f2b..83170ead4d 100644 --- ql/src/test/results/clientpositive/spark/join39.q.out +++ ql/src/test/results/clientpositive/spark/join39.q.out @@ -27,7 +27,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src @@ -48,6 +48,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,7 +69,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -77,8 +79,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(key1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,7 +120,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value diff --git ql/src/test/results/clientpositive/spark/join4.q.out ql/src/test/results/clientpositive/spark/join4.q.out index ae91a49789..3508c7178c 100644 --- ql/src/test/results/clientpositive/spark/join4.q.out +++ ql/src/test/results/clientpositive/spark/join4.q.out @@ -45,7 +45,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +146,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/join5.q.out ql/src/test/results/clientpositive/spark/join5.q.out index d79af77f9e..06cb5485ef 100644 --- ql/src/test/results/clientpositive/spark/join5.q.out +++ ql/src/test/results/clientpositive/spark/join5.q.out @@ -45,7 +45,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +146,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/join6.q.out ql/src/test/results/clientpositive/spark/join6.q.out index 8ae5e3af59..edb9055217 100644 --- ql/src/test/results/clientpositive/spark/join6.q.out +++ ql/src/test/results/clientpositive/spark/join6.q.out @@ -45,7 +45,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -118,7 +146,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/join7.q.out ql/src/test/results/clientpositive/spark/join7.q.out index 45c42319c3..fd474bf009 100644 --- ql/src/test/results/clientpositive/spark/join7.q.out +++ ql/src/test/results/clientpositive/spark/join7.q.out @@ -55,7 +55,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,7 +77,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -94,7 +95,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -136,6 +137,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -148,7 +176,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/join8.q.out ql/src/test/results/clientpositive/spark/join8.q.out index 4ddad76bda..89447c35db 100644 --- ql/src/test/results/clientpositive/spark/join8.q.out +++ ql/src/test/results/clientpositive/spark/join8.q.out @@ -45,7 +45,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -109,6 +110,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,7 +149,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 PREHOOK: query: FROM ( FROM diff --git ql/src/test/results/clientpositive/spark/join9.q.out ql/src/test/results/clientpositive/spark/join9.q.out index c7a191a4c0..fd0b93d572 100644 --- ql/src/test/results/clientpositive/spark/join9.q.out +++ ql/src/test/results/clientpositive/spark/join9.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,7 +102,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [$hdt$_0:src1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -222,6 +223,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -252,8 +299,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' diff --git ql/src/test/results/clientpositive/spark/join_map_ppr.q.out ql/src/test/results/clientpositive/spark/join_map_ppr.q.out index 86f3d9a8c4..efdcc3f6bc 100644 --- ql/src/test/results/clientpositive/spark/join_map_ppr.q.out +++ ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -172,6 +172,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -234,6 +236,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -289,6 +307,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -319,8 +367,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value @@ -550,7 +604,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -571,7 +625,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -619,7 +673,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -640,7 +694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -664,6 +718,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -705,7 +761,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -726,6 +782,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -781,6 +853,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -791,7 +893,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -811,8 +913,14 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out index cdd5c661d7..9d1aeff01c 100644 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out +++ ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -123,6 +144,41 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,8 +209,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src @@ -198,7 +260,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out index 08df5d3772..37472917ea 100644 --- ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out +++ ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +195,41 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,8 +260,14 @@ STAGE PLANS: name: default.list_bucketing_static_part Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' @@ -253,7 +315,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -314,7 +376,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out index 5bf7f2884e..067b21a586 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out @@ -62,9 +62,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -84,6 +87,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +118,72 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,7 +199,12 @@ STAGE PLANS: name: default.nzhang_part1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-1 Move Operator @@ -130,7 +220,12 @@ STAGE PLANS: name: default.nzhang_part2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out index 5ec6d68ea3..6d4e243fee 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -69,6 +71,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,7 +121,12 @@ STAGE PLANS: name: default.nzhang_part10 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out index 7f403d575d..6867629b57 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out @@ -64,6 +64,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -86,7 +88,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -106,6 +124,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,7 +174,12 @@ STAGE PLANS: name: default.nzhang_part13 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( diff --git ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index ce65e718d9..ac2397c9bf 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -53,8 +53,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 5 <- Map 1 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +71,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Limit @@ -88,7 +88,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Limit Number of rows: 2 @@ -105,7 +140,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Limit Number of rows: 2 @@ -122,6 +173,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Stage: Stage-0 Move Operator @@ -136,7 +203,12 @@ STAGE PLANS: name: default.nzhang_part14 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 PREHOOK: query: insert overwrite table nzhang_part14 partition(value) select key, value from ( diff --git ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out index d066b3ae11..21f820a1b4 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out @@ -44,6 +44,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 10) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -74,6 +75,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,7 +125,12 @@ STAGE PLANS: name: default.nzhang_part_bucket Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out index d120963697..eb715af012 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out @@ -48,6 +48,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,7 +119,12 @@ STAGE PLANS: name: default.nzhang_part3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out index 7ec76b5c77..4052442b0a 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out @@ -58,6 +58,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +79,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +129,12 @@ STAGE PLANS: name: default.nzhang_part4 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out index b132a591d4..cea818b822 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out @@ -35,6 +35,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -54,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,7 +105,12 @@ STAGE PLANS: name: default.nzhang_part5 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out index e19a986952..fbf1221a6b 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out @@ -54,9 +54,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -99,6 +102,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -337,6 +359,309 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -367,7 +692,8 @@ STAGE PLANS: name: default.nzhang_part8 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-1 @@ -399,8 +725,14 @@ STAGE PLANS: name: default.nzhang_part8 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out index 55bcfae4fc..95a1410c78 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -69,6 +71,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,7 +121,12 @@ STAGE PLANS: name: default.nzhang_part9 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git ql/src/test/results/clientpositive/spark/mapreduce1.q.out ql/src/test/results/clientpositive/spark/mapreduce1.q.out index d75b482871..eafea06891 100644 --- ql/src/test/results/clientpositive/spark/mapreduce1.q.out +++ ql/src/test/results/clientpositive/spark/mapreduce1.q.out @@ -32,6 +32,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +71,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,7 +110,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/spark/mapreduce2.q.out ql/src/test/results/clientpositive/spark/mapreduce2.q.out index adfb503aa7..ee6f5a2acf 100644 --- ql/src/test/results/clientpositive/spark/mapreduce2.q.out +++ ql/src/test/results/clientpositive/spark/mapreduce2.q.out @@ -30,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +68,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,7 +107,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 diff --git ql/src/test/results/clientpositive/spark/merge1.q.out ql/src/test/results/clientpositive/spark/merge1.q.out index 8e671e952c..d259f689ce 100644 --- ql/src/test/results/clientpositive/spark/merge1.q.out +++ ql/src/test/results/clientpositive/spark/merge1.q.out @@ -29,6 +29,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +73,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -93,7 +121,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 Stage: Stage-3 Spark @@ -523,6 +556,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -542,6 +577,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -563,7 +625,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 Stage: Stage-3 Spark @@ -633,6 +700,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +721,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -673,7 +769,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/merge2.q.out ql/src/test/results/clientpositive/spark/merge2.q.out index 24116cb468..8e120a148d 100644 --- ql/src/test/results/clientpositive/spark/merge2.q.out +++ ql/src/test/results/clientpositive/spark/merge2.q.out @@ -29,6 +29,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +73,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -93,7 +121,12 @@ STAGE PLANS: name: default.test1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 Stage: Stage-3 Spark @@ -523,6 +556,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -542,6 +577,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -563,7 +625,12 @@ STAGE PLANS: name: default.test1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 Stage: Stage-3 Spark @@ -633,6 +700,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +721,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -673,7 +769,12 @@ STAGE PLANS: name: default.test1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 2891c7df23..34647d41a6 100644 --- ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -183,56 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col0 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -243,56 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col0 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -303,56 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col0 (type: bigint), _col3 (type: bigint), 7 (type: decimal(2,0)), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -363,96 +231,66 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col0 (type: bigint), _col3 (type: bigint), 7 (type: decimal(2,0)), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: int), _col8 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl +PREHOOK: Output: default@stats_tbl #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl +POSTHOOK: Output: default@stats_tbl #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2010 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt='2010') compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2010 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2011 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2011 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt='2011') compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2011 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2011 #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2012 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt='2012') compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2012 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2012 #### A masked pattern was here #### PREHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si) from stats_tbl diff --git ql/src/test/results/clientpositive/spark/metadata_only_queries_with_filters.q.out ql/src/test/results/clientpositive/spark/metadata_only_queries_with_filters.q.out index 6376aa79f8..79d9d27203 100644 --- ql/src/test/results/clientpositive/spark/metadata_only_queries_with_filters.q.out +++ ql/src/test/results/clientpositive/spark/metadata_only_queries_with_filters.q.out @@ -126,21 +126,29 @@ PREHOOK: query: analyze table stats_tbl_part partition(dt=2010) compute statisti PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2010 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt=2010) compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2010 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2010 #### A masked pattern was here #### PREHOOK: query: analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin PREHOOK: type: QUERY PREHOOK: Input: default@stats_tbl_part PREHOOK: Input: default@stats_tbl_part@dt=2014 +PREHOOK: Output: default@stats_tbl_part +PREHOOK: Output: default@stats_tbl_part@dt=2014 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_tbl_part partition(dt=2014) compute statistics for columns t,si,i,b,f,d,bo,s,bin POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_tbl_part POSTHOOK: Input: default@stats_tbl_part@dt=2014 +POSTHOOK: Output: default@stats_tbl_part +POSTHOOK: Output: default@stats_tbl_part@dt=2014 #### A masked pattern was here #### PREHOOK: query: explain select count(*), count(1), sum(1), count(s), count(bo), count(bin), count(si), max(i), min(b), max(f), min(d) from stats_tbl_part where dt = 2010 diff --git ql/src/test/results/clientpositive/spark/multi_insert.q.out ql/src/test/results/clientpositive/spark/multi_insert.q.out index 33af962c97..9d507e00e3 100644 --- ql/src/test/results/clientpositive/spark/multi_insert.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert.q.out @@ -34,9 +34,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -56,6 +59,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -71,6 +87,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,7 +152,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -96,7 +170,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -171,9 +250,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -193,6 +275,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -208,6 +303,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -220,7 +368,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -233,7 +386,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -308,9 +466,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -330,6 +491,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -345,6 +519,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -357,7 +584,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -370,7 +602,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -445,9 +682,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -467,6 +707,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -482,6 +735,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -494,7 +800,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -507,7 +818,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -583,7 +899,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -599,7 +918,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -619,6 +966,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -635,6 +995,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -647,7 +1032,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -660,7 +1050,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -729,7 +1124,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -745,7 +1143,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -765,6 +1191,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -781,6 +1220,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -793,7 +1257,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -806,7 +1275,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -875,7 +1349,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -891,7 +1368,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -911,6 +1416,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -927,6 +1445,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -939,7 +1482,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -952,7 +1500,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1021,7 +1574,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1037,7 +1593,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1057,6 +1641,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1073,6 +1670,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -1085,7 +1707,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1098,7 +1725,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1166,9 +1798,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1188,6 +1823,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1199,7 +1847,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1209,27 +1857,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1242,7 +1912,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1255,7 +1930,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1349,9 +2029,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1371,6 +2054,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1382,7 +2078,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1392,27 +2088,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1425,7 +2143,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1438,7 +2161,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1532,9 +2260,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1554,6 +2285,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1565,7 +2309,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1575,27 +2319,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1608,7 +2374,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1621,7 +2392,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1715,9 +2491,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1737,6 +2516,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1748,7 +2540,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1758,27 +2550,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1791,7 +2605,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1804,7 +2623,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out index d8c4b7fc6f..cf664e3f9f 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out @@ -37,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,7 +56,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE @@ -78,6 +109,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +143,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 500) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -111,7 +185,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 Stage: Stage-1 Move Operator @@ -124,7 +203,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 @@ -224,7 +308,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -241,7 +328,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -263,6 +378,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -284,6 +412,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(count, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-1 Move Operator @@ -296,7 +454,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 Stage: Stage-0 Move Operator @@ -309,7 +472,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index 3d67b1d261..813df76630 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -76,6 +76,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -89,6 +109,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 'hll') + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -101,7 +141,12 @@ STAGE PLANS: name: default.e1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 Stage: Stage-1 Move Operator @@ -114,7 +159,12 @@ STAGE PLANS: name: default.e2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 PREHOOK: query: FROM (select key, cast(key as double) as value from src order by key) a INSERT OVERWRITE TABLE e1 diff --git ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index 7b804daba3..836b29df63 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -54,9 +54,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -82,6 +85,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -104,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -147,6 +176,92 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator + expressions: array((key + 3),(key + 4)) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -159,7 +274,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -172,7 +292,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, C lateral view explode(array(key+1, key+2)) A as C @@ -269,11 +394,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src_10 @@ -321,7 +448,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src_10 @@ -389,9 +516,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -409,6 +563,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -421,7 +602,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -434,7 +620,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key @@ -515,11 +706,15 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 7 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 1) + Reducer 6 <- Reducer 10 (GROUP, 1) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 7 Map Operator Tree: TableScan alias: src_10 @@ -567,7 +762,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Map 5 + Map 8 Map Operator Tree: TableScan alias: src_10 @@ -581,6 +776,36 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Reducer 10 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -601,8 +826,63 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -626,6 +906,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -659,7 +952,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -672,7 +970,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -685,7 +988,12 @@ STAGE PLANS: name: default.src_lv3 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(C) lateral view explode(array(key+1, key+2)) A as C group by key @@ -780,12 +1088,35 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT key) + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Map 8 Map Operator Tree: TableScan alias: src_10 @@ -831,7 +1162,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 9 Map Operator Tree: TableScan alias: src_10 @@ -877,26 +1208,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: src_10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT key) - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -917,9 +1228,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -937,7 +1275,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -957,6 +1322,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -969,7 +1361,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -982,7 +1379,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -995,7 +1397,12 @@ STAGE PLANS: name: default.src_lv3 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 PREHOOK: query: from src_10 insert overwrite table src_lv1 select C, sum(distinct key) lateral view explode(array(key+1, key+2)) A as C group by C @@ -1132,12 +1539,17 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 12 (GROUP, 1) + Reducer 8 <- Reducer 13 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan alias: src_10 @@ -1163,7 +1575,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: array((key + 1),(key + 2)) (type: array) + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -1183,7 +1595,20 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 11 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) or (key > 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Map 9 Map Operator Tree: TableScan alias: src_10 @@ -1209,7 +1634,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: array((key + 3),(key + 4)) (type: array) + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -1229,60 +1654,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: src_10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 200) or (key > 200)) (type: boolean) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - Reducer 4 + Reducer 12 Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE @@ -1307,6 +1679,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1328,6 +1713,158 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Reducer 13 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col1:0._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1340,7 +1877,12 @@ STAGE PLANS: name: default.src_lv1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 Stage: Stage-1 Move Operator @@ -1353,7 +1895,12 @@ STAGE PLANS: name: default.src_lv2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 Stage: Stage-2 Move Operator @@ -1366,7 +1913,12 @@ STAGE PLANS: name: default.src_lv3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 Stage: Stage-3 Move Operator @@ -1379,7 +1931,12 @@ STAGE PLANS: name: default.src_lv4 Stage: Stage-8 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 PREHOOK: query: from src_10 insert overwrite table src_lv1 select key, sum(distinct C) lateral view explode(array(key+1, key+2)) A as C group by key diff --git ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index 2b28d5313e..3da1accb5d 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -47,13 +47,14 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 7 (GROUP, 2) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (GROUP, 2) + Reducer 4 <- Map 8 (GROUP, 2) Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 6 <- Map 9 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -89,7 +90,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -110,6 +111,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -137,6 +163,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Group By Operator @@ -164,6 +210,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -176,7 +256,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -189,7 +274,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator @@ -202,7 +292,12 @@ STAGE PLANS: name: default.src_multi3 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 PREHOOK: query: from src insert overwrite table src_multi1 select key, count(1) group by key order by key diff --git ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index 7f1d67b566..c4a670f12f 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -35,9 +35,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -57,6 +60,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -72,6 +88,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -87,7 +156,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -100,7 +174,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -176,9 +255,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -198,6 +280,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -213,6 +308,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -228,7 +376,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -241,7 +394,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -317,9 +475,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -339,6 +500,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -354,6 +528,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -369,7 +596,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -382,7 +614,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -458,9 +695,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -480,6 +720,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -495,6 +748,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -510,7 +816,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -523,7 +834,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -600,7 +916,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -616,7 +935,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -636,6 +983,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -652,6 +1012,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -667,7 +1052,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -680,7 +1070,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -750,7 +1145,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -766,7 +1164,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -786,6 +1212,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -802,6 +1241,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -817,7 +1281,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -830,7 +1299,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -900,7 +1374,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -916,7 +1393,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -936,6 +1441,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -952,6 +1470,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -967,7 +1510,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -980,7 +1528,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1050,7 +1603,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1066,7 +1622,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1086,6 +1670,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1102,6 +1699,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -1117,7 +1739,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1130,7 +1757,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value @@ -1199,9 +1831,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1221,6 +1856,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1232,7 +1880,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1242,27 +1890,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1278,7 +1948,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1291,7 +1966,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1386,9 +2066,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1408,6 +2091,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1419,7 +2115,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1429,27 +2125,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1465,7 +2183,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1478,7 +2201,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1573,9 +2301,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1595,6 +2326,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1606,7 +2350,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1616,27 +2360,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1652,7 +2418,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1665,7 +2436,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1760,9 +2536,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1782,6 +2561,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1793,7 +2585,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1803,27 +2595,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1839,7 +2653,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -1852,7 +2671,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -2793,11 +3617,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -2817,6 +3643,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -2832,6 +3671,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2842,6 +3711,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2873,7 +3770,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -2901,7 +3798,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -2914,7 +3816,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator @@ -3013,11 +3920,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3037,6 +3946,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3052,6 +3974,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3062,6 +4014,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3093,7 +4073,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3121,7 +4101,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3134,7 +4119,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator @@ -3233,11 +4223,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3257,6 +4249,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3272,6 +4277,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3282,6 +4317,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3313,7 +4376,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3341,7 +4404,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3354,7 +4422,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator @@ -3453,11 +4526,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3477,6 +4552,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3492,6 +4580,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3502,6 +4620,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3533,7 +4679,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3561,7 +4707,12 @@ STAGE PLANS: name: default.src_multi1 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 Stage: Stage-1 Move Operator @@ -3574,7 +4725,12 @@ STAGE PLANS: name: default.src_multi2 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 Stage: Stage-2 Move Operator diff --git ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index a146a8e83b..741ed57c14 100644 --- ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -59,11 +59,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -84,7 +86,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -125,9 +127,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -145,6 +174,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -157,7 +213,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -170,7 +231,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 PREHOOK: query: EXPLAIN FROM TBL @@ -193,11 +259,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -218,7 +286,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -259,9 +327,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -279,6 +374,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -291,7 +413,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 Stage: Stage-1 Move Operator @@ -304,7 +431,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 PREHOOK: query: EXPLAIN FROM TBL @@ -327,11 +459,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -352,7 +486,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -393,9 +527,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -413,6 +574,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -425,7 +613,12 @@ STAGE PLANS: name: default.dest3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-1 Move Operator @@ -438,7 +631,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 PREHOOK: query: EXPLAIN FROM TBL @@ -461,7 +659,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -479,7 +680,35 @@ STAGE PLANS: Map-reduce partition columns: c1 (type: int), c2 (type: int), c3 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: c4 (type: int) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -501,6 +730,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -519,6 +761,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Stage: Stage-0 Move Operator @@ -531,7 +800,12 @@ STAGE PLANS: name: default.dest3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-1 Move Operator @@ -544,7 +818,12 @@ STAGE PLANS: name: default.dest4 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 PREHOOK: query: EXPLAIN FROM TBL @@ -571,12 +850,36 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 5 (GROUP, 2) - Reducer 3 <- Map 6 (GROUP, 2) - Reducer 4 <- Map 7 (GROUP, 2) + Reducer 2 <- Map 8 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 9 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(c2) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + Map 8 Map Operator Tree: TableScan alias: tbl @@ -597,7 +900,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) - Map 6 + Map 9 Map Operator Tree: TableScan alias: tbl @@ -618,27 +921,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col2 (type: bigint) - Map 7 - Map Operator Tree: - TableScan - alias: tbl - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: int) - outputColumnNames: c1, c2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(c2) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -659,9 +941,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll'), compute_stats(d4, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1696 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -679,7 +988,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll'), compute_stats(d3, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -699,6 +1035,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 'hll'), compute_stats(d2, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -711,7 +1074,12 @@ STAGE PLANS: name: default.dest3 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 Stage: Stage-1 Move Operator @@ -724,7 +1092,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 Stage: Stage-2 Move Operator @@ -737,5 +1110,10 @@ STAGE PLANS: name: default.dest1 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 diff --git ql/src/test/results/clientpositive/spark/orc_merge1.q.out ql/src/test/results/clientpositive/spark/orc_merge1.q.out index 1407616c01..2ca5a5f125 100644 --- ql/src/test/results/clientpositive/spark/orc_merge1.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge1.q.out @@ -58,6 +58,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +79,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +129,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -136,6 +178,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -155,6 +199,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -179,7 +258,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Spark @@ -259,6 +343,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -278,6 +364,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -302,7 +423,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/orc_merge2.q.out ql/src/test/results/clientpositive/spark/orc_merge2.q.out index b7f1a65e22..d044d744a7 100644 --- ql/src/test/results/clientpositive/spark/orc_merge2.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge2.q.out @@ -37,6 +37,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -56,6 +58,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -81,7 +118,12 @@ STAGE PLANS: name: default.orcfile_merge2a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/orc_merge3.q.out ql/src/test/results/clientpositive/spark/orc_merge3.q.out index 81a6013d1e..e5f4437ceb 100644 --- ql/src/test/results/clientpositive/spark/orc_merge3.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge3.q.out @@ -67,6 +67,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -86,6 +88,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -107,7 +136,12 @@ STAGE PLANS: name: default.orcfile_merge3b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/orc_merge4.q.out ql/src/test/results/clientpositive/spark/orc_merge4.q.out index 8d433b031a..5798124de8 100644 --- ql/src/test/results/clientpositive/spark/orc_merge4.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge4.q.out @@ -85,6 +85,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +106,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -125,7 +154,12 @@ STAGE PLANS: name: default.orcfile_merge3b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/orc_merge5.q.out ql/src/test/results/clientpositive/spark/orc_merge5.q.out index 1f8c869574..d162e99200 100644 --- ql/src/test/results/clientpositive/spark/orc_merge5.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge5.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +59,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,7 +98,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -122,6 +156,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -145,6 +181,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -166,7 +229,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b Stage: Stage-3 Spark @@ -287,7 +355,8 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5b concatenate PREHOOK: type: ALTER_TABLE_MERGE diff --git ql/src/test/results/clientpositive/spark/orc_merge6.q.out ql/src/test/results/clientpositive/spark/orc_merge6.q.out index be62faed0e..7760312cc8 100644 --- ql/src/test/results/clientpositive/spark/orc_merge6.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge6.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +59,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,7 +109,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY @@ -167,6 +209,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -190,6 +234,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -214,7 +293,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-3 Spark @@ -422,7 +506,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(year="2000",hour=24) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/spark/orc_merge7.q.out ql/src/test/results/clientpositive/spark/orc_merge7.q.out index 01e3eac54a..d2097919a3 100644 --- ql/src/test/results/clientpositive/spark/orc_merge7.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge7.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +55,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +104,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY @@ -201,6 +243,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -220,6 +264,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -243,7 +322,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a Stage: Stage-3 Spark @@ -528,7 +612,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out index 1407616c01..2ca5a5f125 100644 --- ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out @@ -58,6 +58,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +79,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -92,7 +129,12 @@ STAGE PLANS: name: default.orcfile_merge1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part @@ -136,6 +178,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -155,6 +199,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -179,7 +258,12 @@ STAGE PLANS: name: default.orcfile_merge1b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b Stage: Stage-3 Spark @@ -259,6 +343,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -278,6 +364,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -302,7 +423,12 @@ STAGE PLANS: name: default.orcfile_merge1c Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c Stage: Stage-3 Spark diff --git ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out index 65790c4962..eaffdebb0a 100644 --- ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,6 +58,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2408 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,7 +97,12 @@ STAGE PLANS: name: default.orc_merge5b Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out index 52973c87bb..28b292f7db 100644 --- ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out +++ ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out @@ -34,6 +34,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 4) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +55,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 'hll'), compute_stats(string1, 'hll'), compute_stats(subtype, 'hll'), compute_stats(decimal1, 'hll'), compute_stats(ts, 'hll') + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +104,12 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY @@ -280,7 +322,8 @@ STAGE PLANS: name: default.orc_merge5a Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: alter table orc_merge5a partition(st=80.0) concatenate PREHOOK: type: ALTER_PARTITION_MERGE diff --git ql/src/test/results/clientpositive/spark/parallel.q.out ql/src/test/results/clientpositive/spark/parallel.q.out index e31fcf0b5a..79cf68c8a6 100644 --- ql/src/test/results/clientpositive/spark/parallel.q.out +++ ql/src/test/results/clientpositive/spark/parallel.q.out @@ -36,7 +36,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +73,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -87,6 +118,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -100,6 +144,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -112,7 +178,12 @@ STAGE PLANS: name: default.src_a Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a Stage: Stage-1 Move Operator @@ -125,7 +196,12 @@ STAGE PLANS: name: default.src_b Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value diff --git ql/src/test/results/clientpositive/spark/parallel_join1.q.out ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 7fdd48d2ca..22c698d8e9 100644 --- ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -23,7 +23,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +44,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +84,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,7 +123,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/spark/parallel_orderby.q.out ql/src/test/results/clientpositive/spark/parallel_orderby.q.out index 483e42d642..de92027d65 100644 --- ql/src/test/results/clientpositive/spark/parallel_orderby.q.out +++ ql/src/test/results/clientpositive/spark/parallel_orderby.q.out @@ -85,7 +85,8 @@ STAGE PLANS: name: default.total_ordered Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table total_ordered as select * from src5 order by key, value PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 3ed71afc4d..3fee500704 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -96,7 +96,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -143,7 +143,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -291,7 +291,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -338,7 +338,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -385,7 +385,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -571,7 +571,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -618,7 +618,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -768,7 +768,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -815,7 +815,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -967,7 +967,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1014,7 +1014,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1061,7 +1061,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1224,7 +1224,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1271,7 +1271,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1318,7 +1318,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1488,7 +1488,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1535,7 +1535,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1715,7 +1715,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1888,7 +1888,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1935,7 +1935,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1982,7 +1982,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2182,7 +2182,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2229,7 +2229,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2370,7 +2370,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2445,7 +2445,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2643,7 +2643,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2718,7 +2718,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2925,7 +2925,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2972,7 +2972,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3019,7 +3019,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3066,7 +3066,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3251,7 +3251,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3298,7 +3298,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3345,7 +3345,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3515,9 +3515,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: pcr_t1 @@ -3559,6 +3562,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3606,7 +3625,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3647,6 +3666,144 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Map 5 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3677,8 +3834,14 @@ STAGE PLANS: name: default.pcr_t2 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-1 Move Operator @@ -3709,8 +3872,14 @@ STAGE PLANS: name: default.pcr_t3 Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' @@ -3752,9 +3921,12 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: pcr_t1 @@ -3779,7 +3951,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3800,6 +3972,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -3819,7 +4007,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3851,7 +4039,89 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + Map 5 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3892,6 +4162,66 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3902,7 +4232,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3922,8 +4252,14 @@ STAGE PLANS: name: default.pcr_t2 Stage: Stage-3 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true Stage: Stage-1 Move Operator @@ -3934,7 +4270,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3954,8 +4290,14 @@ STAGE PLANS: name: default.pcr_t3 Stage: Stage-4 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 diff --git ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out index 12b1724990..bc8ce433dd 100644 --- ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out +++ ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out @@ -47,18 +47,23 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-3 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2, Stage-3 Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 7 (GROUP, 1) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -75,7 +80,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: b @@ -88,14 +93,58 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -113,6 +162,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -157,6 +219,63 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Stage: Stage-0 Move Operator @@ -169,7 +288,12 @@ STAGE PLANS: name: default.mi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 Stage: Stage-1 Move Operator @@ -182,7 +306,12 @@ STAGE PLANS: name: default.mi2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 Stage: Stage-2 Move Operator @@ -198,7 +327,12 @@ STAGE PLANS: name: default.mi3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 Stage: Stage-3 Move Operator @@ -1320,18 +1454,23 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0, Stage-3 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-3 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2, Stage-3 Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 7 (GROUP, 1) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1348,7 +1487,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: b @@ -1361,14 +1500,58 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1386,6 +1569,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1430,6 +1626,63 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Stage: Stage-0 Move Operator @@ -1442,7 +1695,12 @@ STAGE PLANS: name: default.mi1 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 Stage: Stage-1 Move Operator @@ -1455,7 +1713,12 @@ STAGE PLANS: name: default.mi2 Stage: Stage-6 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 Stage: Stage-2 Move Operator @@ -1471,7 +1734,12 @@ STAGE PLANS: name: default.mi3 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 Stage: Stage-3 Move Operator diff --git ql/src/test/results/clientpositive/spark/ptf.q.out ql/src/test/results/clientpositive/spark/ptf.q.out index 82fc9f8198..5da3038d20 100644 --- ql/src/test/results/clientpositive/spark/ptf.q.out +++ ql/src/test/results/clientpositive/spark/ptf.q.out @@ -2872,11 +2872,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2942,8 +2944,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(s, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2576 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -2978,7 +3007,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -3037,7 +3066,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 'hll'), compute_stats(p_name, 'hll'), compute_stats(p_size, 'hll'), compute_stats(s2, 'hll'), compute_stats(r, 'hll'), compute_stats(dr, 'hll'), compute_stats(cud, 'hll'), compute_stats(fv1, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3520 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3061,7 +3117,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) - Reducer 7 + Reducer 9 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3097,7 +3153,12 @@ STAGE PLANS: name: default.part_4 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 Stage: Stage-1 Move Operator @@ -3110,7 +3171,12 @@ STAGE PLANS: name: default.part_5 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 PREHOOK: query: from noop(on part partition by p_mfgr diff --git ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out index 804ff02948..57b0e776a2 100644 --- ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out +++ ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out @@ -136,6 +136,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -167,8 +202,14 @@ STAGE PLANS: name: default.bucket5_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key @@ -258,6 +299,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -346,6 +388,60 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(aid, 'hll'), compute_stats(bid, 'hll'), compute_stats(t, 'hll'), compute_stats(ctime, 'hll'), compute_stats(etime, 'hll'), compute_stats(l, 'hll'), compute_stats(et, 'hll') + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -375,6 +471,12 @@ STAGE PLANS: name: default.complex_tbl_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false diff --git ql/src/test/results/clientpositive/spark/sample1.q.out ql/src/test/results/clientpositive/spark/sample1.q.out index ee9eb14685..fb5244abc9 100644 --- ql/src/test/results/clientpositive/spark/sample1.q.out +++ ql/src/test/results/clientpositive/spark/sample1.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(dt, 'hll'), compute_stats(hr, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1744 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +143,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -155,8 +203,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s diff --git ql/src/test/results/clientpositive/spark/sample10.q.out ql/src/test/results/clientpositive/spark/sample10.q.out index d589216172..40a7392cb3 100644 --- ql/src/test/results/clientpositive/spark/sample10.q.out +++ ql/src/test/results/clientpositive/spark/sample10.q.out @@ -93,7 +93,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -143,7 +143,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -193,7 +193,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -243,7 +243,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/spark/sample2.q.out ql/src/test/results/clientpositive/spark/sample2.q.out index 85266d1bd8..44df416a81 100644 --- ql/src/test/results/clientpositive/spark/sample2.q.out +++ ql/src/test/results/clientpositive/spark/sample2.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +142,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -154,8 +202,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2) s diff --git ql/src/test/results/clientpositive/spark/sample4.q.out ql/src/test/results/clientpositive/spark/sample4.q.out index 69e7ee94f8..897833c150 100644 --- ql/src/test/results/clientpositive/spark/sample4.q.out +++ ql/src/test/results/clientpositive/spark/sample4.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +142,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -154,8 +202,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s diff --git ql/src/test/results/clientpositive/spark/sample5.q.out ql/src/test/results/clientpositive/spark/sample5.q.out index 558b2dbb68..7b6b736b77 100644 --- ql/src/test/results/clientpositive/spark/sample5.q.out +++ ql/src/test/results/clientpositive/spark/sample5.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -155,8 +203,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* diff --git ql/src/test/results/clientpositive/spark/sample6.q.out ql/src/test/results/clientpositive/spark/sample6.q.out index 4b358291e4..685008aafb 100644 --- ql/src/test/results/clientpositive/spark/sample6.q.out +++ ql/src/test/results/clientpositive/spark/sample6.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +142,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -154,8 +202,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s diff --git ql/src/test/results/clientpositive/spark/sample7.q.out ql/src/test/results/clientpositive/spark/sample7.q.out index eae33cad36..b43116c245 100644 --- ql/src/test/results/clientpositive/spark/sample7.q.out +++ ql/src/test/results/clientpositive/spark/sample7.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -155,8 +203,14 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s diff --git ql/src/test/results/clientpositive/spark/skewjoin.q.out ql/src/test/results/clientpositive/spark/skewjoin.q.out index b0b28c3114..2dc345a7e5 100644 --- ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -80,9 +80,10 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-4 depends on stages: Stage-1 , consists of Stage-5, Stage-0 - Stage-5 - Stage-3 depends on stages: Stage-5 + Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-3 + Stage-6 + Stage-4 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-3 Stage-2 depends on stages: Stage-0 @@ -90,7 +91,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +111,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -151,15 +152,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 7 Map Operator Tree: TableScan Spark HashTable Sink Operator @@ -169,11 +185,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan Map Join Operator @@ -195,9 +211,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + Stage: Stage-3 + Spark + Edges: + Reducer 3 <- Map 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -209,7 +268,12 @@ STAGE PLANS: name: default.dest_j1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value diff --git ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out index 3f10ee5685..a0adab1e68 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_noskew.q.out @@ -163,7 +163,8 @@ STAGE PLANS: name: default.noskew Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table noskew as select a.* from src a join src b on a.key=b.key order by a.key limit 30 PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out index 9424361c56..5225b18d19 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out @@ -56,6 +56,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,7 +102,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -183,6 +184,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -213,8 +268,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY @@ -1798,7 +1859,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1871,7 +1932,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out index d0bb917eea..8c8bebb767 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out @@ -68,6 +68,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -114,7 +115,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -197,6 +198,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -228,8 +283,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY @@ -284,6 +345,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -330,7 +392,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -413,6 +475,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -444,8 +560,14 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index d8bd846a01..6197f20830 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -120,7 +120,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -143,7 +143,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -267,7 +267,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -290,7 +290,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -366,7 +366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -389,7 +389,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index 9ce04d8a22..60bfe5b9e6 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -82,7 +82,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -105,7 +105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -160,7 +160,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -183,7 +183,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -388,7 +388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -411,7 +411,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -466,7 +466,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -489,7 +489,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -642,7 +642,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -665,7 +665,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -720,7 +720,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -743,7 +743,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -896,7 +896,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -919,7 +919,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -974,7 +974,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -997,7 +997,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out index 6ed3c21a99..24091f3fe3 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out @@ -79,7 +79,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' @@ -219,6 +224,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -253,6 +259,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -267,7 +308,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 @@ -385,7 +431,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out index fb2501597d..1371f6f22e 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out @@ -79,7 +79,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out index 6be912398e..c2de2d69f2 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out @@ -48,6 +48,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -93,7 +129,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' @@ -214,7 +255,12 @@ STAGE PLANS: name: default.test_table3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' @@ -1337,6 +1383,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1368,6 +1415,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1382,5 +1464,10 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out index 1a9118d378..042cda0afa 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out @@ -79,7 +79,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -117,6 +122,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -148,6 +154,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -162,7 +203,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -200,6 +246,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -230,6 +277,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -244,7 +326,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -282,6 +369,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -313,6 +401,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -327,7 +450,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -365,6 +493,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -396,6 +525,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -410,7 +574,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE @@ -448,6 +617,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -478,6 +648,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -492,5 +697,10 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out index d8bdef2d25..80e0f95b37 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out @@ -75,7 +75,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 @@ -218,7 +223,12 @@ STAGE PLANS: name: default.test_table2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 7cf3cf7e64..c1ad237599 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -71,7 +71,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -92,7 +93,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -128,6 +129,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -140,7 +168,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key @@ -1243,7 +1276,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1264,7 +1298,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1300,6 +1334,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1312,7 +1373,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key @@ -2431,7 +2497,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2452,7 +2519,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -2488,6 +2555,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2500,7 +2594,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 @@ -2535,7 +2634,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2556,7 +2656,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -2592,6 +2692,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2604,7 +2731,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 622b950cef..7581b425e0 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -622,6 +622,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -649,6 +651,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(v1, 'hll'), compute_stats(k2, 'hll'), compute_stats(v2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -661,7 +690,12 @@ STAGE PLANS: name: default.smb_join_results Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 450499496a..0c29607e2e 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -106,7 +106,8 @@ STAGE PLANS: name: default.srcpart_date Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table srcpart_date as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 7cafb52b24..d104162b36 100644 --- ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -108,7 +108,8 @@ STAGE PLANS: name: default.srcpart_date Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table srcpart_date stored as orc as select ds as ds, ds as `date` from srcpart group by ds PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/spark/stats0.q.out ql/src/test/results/clientpositive/spark/stats0.q.out index ece5f08fbd..1883873ea0 100644 --- ql/src/test/results/clientpositive/spark/stats0.q.out +++ ql/src/test/results/clientpositive/spark/stats0.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,6 +68,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -117,6 +135,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -147,8 +195,14 @@ STAGE PLANS: name: default.stats_non_partitioned Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true PREHOOK: query: insert overwrite table stats_non_partitioned select * from src @@ -704,6 +758,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -723,6 +779,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -737,7 +828,12 @@ STAGE PLANS: name: default.stats_partitioned Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src @@ -1340,6 +1436,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1384,6 +1482,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1435,6 +1549,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1465,8 +1609,14 @@ STAGE PLANS: name: default.stats_non_partitioned Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true PREHOOK: query: insert overwrite table stats_non_partitioned select * from src @@ -2022,6 +2172,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2041,6 +2193,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2055,7 +2242,12 @@ STAGE PLANS: name: default.stats_partitioned Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src diff --git ql/src/test/results/clientpositive/spark/stats1.q.out ql/src/test/results/clientpositive/spark/stats1.q.out index 13690dbbba..b30620bd7a 100644 --- ql/src/test/results/clientpositive/spark/stats1.q.out +++ ql/src/test/results/clientpositive/spark/stats1.q.out @@ -30,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -48,7 +49,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -65,6 +66,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -84,6 +98,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,7 +137,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value @@ -170,7 +216,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git ql/src/test/results/clientpositive/spark/stats10.q.out ql/src/test/results/clientpositive/spark/stats10.q.out index 9c682fce76..b9e3ab9e69 100644 --- ql/src/test/results/clientpositive/spark/stats10.q.out +++ ql/src/test/results/clientpositive/spark/stats10.q.out @@ -24,6 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -54,6 +55,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,7 +104,12 @@ STAGE PLANS: name: default.bucket3_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src @@ -383,7 +424,8 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table bucket3_1 partition (ds) compute statistics PREHOOK: type: QUERY @@ -423,7 +465,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -462,7 +504,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/spark/stats12.q.out ql/src/test/results/clientpositive/spark/stats12.q.out index 460cf1d537..19936de7d9 100644 --- ql/src/test/results/clientpositive/spark/stats12.q.out +++ ql/src/test/results/clientpositive/spark/stats12.q.out @@ -153,8 +153,9 @@ STAGE PLANS: /analyze_srcpart/ds=2008-04-08/hr=12 [analyze_srcpart] Stage: Stage-1 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.analyze_srcpart/ + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.analyze_srcpart/ PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats13.q.out ql/src/test/results/clientpositive/spark/stats13.q.out index 859bb6ba99..2058e74deb 100644 --- ql/src/test/results/clientpositive/spark/stats13.q.out +++ ql/src/test/results/clientpositive/spark/stats13.q.out @@ -107,8 +107,9 @@ STAGE PLANS: /analyze_srcpart/ds=2008-04-08/hr=11 [analyze_srcpart] Stage: Stage-1 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.analyze_srcpart/ + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.analyze_srcpart/ PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats14.q.out ql/src/test/results/clientpositive/spark/stats14.q.out index 85017462c3..1cd660cd67 100644 --- ql/src/test/results/clientpositive/spark/stats14.q.out +++ ql/src/test/results/clientpositive/spark/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/spark/stats15.q.out ql/src/test/results/clientpositive/spark/stats15.q.out index 85017462c3..1cd660cd67 100644 --- ql/src/test/results/clientpositive/spark/stats15.q.out +++ ql/src/test/results/clientpositive/spark/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/spark/stats18.q.out ql/src/test/results/clientpositive/spark/stats18.q.out index 4945808098..de8918a40a 100644 --- ql/src/test/results/clientpositive/spark/stats18.q.out +++ ql/src/test/results/clientpositive/spark/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/spark/stats2.q.out ql/src/test/results/clientpositive/spark/stats2.q.out index 0272b097e5..49b3da843b 100644 --- ql/src/test/results/clientpositive/spark/stats2.q.out +++ ql/src/test/results/clientpositive/spark/stats2.q.out @@ -140,7 +140,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_t1 partition (ds, hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats3.q.out ql/src/test/results/clientpositive/spark/stats3.q.out index 2f76d0e21d..76f3fb22f3 100644 --- ql/src/test/results/clientpositive/spark/stats3.q.out +++ ql/src/test/results/clientpositive/spark/stats3.q.out @@ -54,7 +54,8 @@ STAGE PLANS: name: default.hive_test_src Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src PREHOOK: type: LOAD diff --git ql/src/test/results/clientpositive/spark/stats5.q.out ql/src/test/results/clientpositive/spark/stats5.q.out index 95293c0071..bb28bdb6dd 100644 --- ql/src/test/results/clientpositive/spark/stats5.q.out +++ ql/src/test/results/clientpositive/spark/stats5.q.out @@ -30,7 +30,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_src compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats7.q.out ql/src/test/results/clientpositive/spark/stats7.q.out index 3605f38a1f..75b95a8947 100644 --- ql/src/test/results/clientpositive/spark/stats7.q.out +++ ql/src/test/results/clientpositive/spark/stats7.q.out @@ -53,7 +53,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats8.q.out ql/src/test/results/clientpositive/spark/stats8.q.out index 4bcbcfde68..971290aa58 100644 --- ql/src/test/results/clientpositive/spark/stats8.q.out +++ ql/src/test/results/clientpositive/spark/stats8.q.out @@ -53,7 +53,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics PREHOOK: type: QUERY @@ -168,7 +169,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=12) compute statistics PREHOOK: type: QUERY @@ -242,7 +244,8 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-09',hr=11) compute statistics PREHOOK: type: QUERY @@ -316,7 +319,8 @@ STAGE PLANS: Statistics: Num rows: 1500 Data size: 15936 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-09',hr=12) compute statistics PREHOOK: type: QUERY @@ -390,7 +394,8 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats9.q.out ql/src/test/results/clientpositive/spark/stats9.q.out index beb025c635..fa602f1312 100644 --- ql/src/test/results/clientpositive/spark/stats9.q.out +++ ql/src/test/results/clientpositive/spark/stats9.q.out @@ -36,7 +36,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 11603 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcbucket compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats_noscan_1.q.out ql/src/test/results/clientpositive/spark/stats_noscan_1.q.out index ad2ca94baa..a36a0b9a20 100644 --- ql/src/test/results/clientpositive/spark/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/spark/stats_noscan_1.q.out @@ -44,7 +44,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan PREHOOK: type: QUERY @@ -315,7 +316,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/stats_partscan_1_23.q.out ql/src/test/results/clientpositive/spark/stats_partscan_1_23.q.out index 50096a5a8d..403bc28cfa 100644 --- ql/src/test/results/clientpositive/spark/stats_partscan_1_23.q.out +++ ql/src/test/results/clientpositive/spark/stats_partscan_1_23.q.out @@ -89,7 +89,8 @@ STAGE PLANS: Partial Scan Statistics Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) compute statistics partialscan PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/statsfs.q.out ql/src/test/results/clientpositive/spark/statsfs.q.out index d070e9aa6f..16da73418f 100644 --- ql/src/test/results/clientpositive/spark/statsfs.q.out +++ ql/src/test/results/clientpositive/spark/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index a0adcaee08..7cdcc4e7d6 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -68,14 +68,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 2 <- Map 11 (PARTITION-LEVEL SORT, 1), Reducer 10 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: b @@ -84,7 +85,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Map 11 + Map 12 Map Operator Tree: TableScan alias: b @@ -97,7 +98,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 7 Map Operator Tree: TableScan alias: a @@ -119,7 +120,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 8 Map Operator Tree: TableScan alias: s1 @@ -136,7 +137,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: s1 @@ -155,6 +156,26 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -207,6 +228,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Join Operator @@ -225,26 +266,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 - Reducer 9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -257,7 +305,12 @@ STAGE PLANS: name: default.src_5 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 Stage: Stage-0 Move Operator @@ -270,7 +323,12 @@ STAGE PLANS: name: default.src_4 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 Warning: Shuffle Join JOIN[31][tables = [sq_2_notin_nullcheck]] in Work 'Reducer 2' is a cross product PREHOOK: query: from src b @@ -312,8 +370,8 @@ POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:strin RUN: Stage-2:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS +RUN: Stage-3:COLUMNSTATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -491,7 +549,8 @@ INSERT OVERWRITE TABLE src_5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 @@ -501,10 +560,10 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -527,7 +586,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan alias: s1 @@ -545,12 +604,102 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-6 + Spark + Edges: + Reducer 7 <- Map 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator predicate: ((key > '2') and key is null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -564,7 +713,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -591,10 +740,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 8 (GROUP, 1) + Reducer 3 <- Map 9 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 8 Map Operator Tree: TableScan alias: b @@ -610,7 +760,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -620,6 +770,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Local Work: + Map Reduce Local Work + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -628,7 +798,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 6 + 1 Reducer 7 Statistics: Num rows: 500 Data size: 9812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -638,7 +808,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 550 Data size: 10793 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col5 is null (type: boolean) @@ -656,6 +826,20 @@ STAGE PLANS: Map Reduce Local Work Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -668,6 +852,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -680,7 +884,12 @@ STAGE PLANS: name: default.src_4 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 Stage: Stage-1 Move Operator @@ -693,7 +902,12 @@ STAGE PLANS: name: default.src_5 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 Warning: Map Join MAPJOIN[47][bigTable=b] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b @@ -733,11 +947,12 @@ POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:strin POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-5:MAPRED +RUN: Stage-6:MAPRED RUN: Stage-2:MAPRED RUN: Stage-0:MOVE RUN: Stage-1:MOVE -RUN: Stage-3:STATS -RUN: Stage-4:STATS +RUN: Stage-3:COLUMNSTATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -746,17 +961,6 @@ POSTHOOK: query: select * from src_4 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_4 #### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 PREHOOK: query: select * from src_5 PREHOOK: type: QUERY PREHOOK: Input: default@src_5 diff --git ql/src/test/results/clientpositive/spark/temp_table.q.out ql/src/test/results/clientpositive/spark/temp_table.q.out index c2ec3b4e21..342d08c1a2 100644 --- ql/src/test/results/clientpositive/spark/temp_table.q.out +++ ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -52,7 +52,8 @@ STAGE PLANS: isTemporary: true Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: CREATE TEMPORARY TABLE foo AS SELECT * FROM src WHERE key % 2 = 0 PREHOOK: type: CREATETABLE_AS_SELECT @@ -118,7 +119,8 @@ STAGE PLANS: isTemporary: true Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: CREATE TEMPORARY TABLE bar AS SELECT * FROM src WHERE key % 2 = 1 PREHOOK: type: CREATETABLE_AS_SELECT diff --git ql/src/test/results/clientpositive/spark/union10.q.out ql/src/test/results/clientpositive/spark/union10.q.out index ea1bebb8b7..74c9ed7530 100644 --- ql/src/test/results/clientpositive/spark/union10.q.out +++ ql/src/test/results/clientpositive/spark/union10.q.out @@ -32,8 +32,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +53,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -68,7 +69,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -107,7 +108,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -130,7 +158,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +194,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -165,7 +219,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 diff --git ql/src/test/results/clientpositive/spark/union12.q.out ql/src/test/results/clientpositive/spark/union12.q.out index 063995660c..aa46ba9157 100644 --- ql/src/test/results/clientpositive/spark/union12.q.out +++ ql/src/test/results/clientpositive/spark/union12.q.out @@ -32,8 +32,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +53,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -68,7 +69,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -107,7 +108,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -130,7 +158,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +194,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -165,7 +219,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 diff --git ql/src/test/results/clientpositive/spark/union17.q.out ql/src/test/results/clientpositive/spark/union17.q.out index a967c0763a..19eb392582 100644 --- ql/src/test/results/clientpositive/spark/union17.q.out +++ ql/src/test/results/clientpositive/spark/union17.q.out @@ -39,10 +39,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 8 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2), Reducer 9 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 8 <- Map 1 (GROUP, 1) - Reducer 9 <- Map 1 (GROUP, 1) + Reducer 10 <- Map 1 (GROUP, 1) + Reducer 11 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2), Reducer 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2), Reducer 11 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +63,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -81,7 +83,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -101,6 +103,50 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Reduce Operator Tree: Group By Operator @@ -121,9 +167,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -141,50 +214,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 8 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Reducer 9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -197,7 +253,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -210,7 +271,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/spark/union18.q.out ql/src/test/results/clientpositive/spark/union18.q.out index 653c54c76e..cecd448ca6 100644 --- ql/src/test/results/clientpositive/spark/union18.q.out +++ ql/src/test/results/clientpositive/spark/union18.q.out @@ -39,7 +39,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Reducer 8 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1), Reducer 9 (GROUP, 1) + Reducer 8 <- Map 1 (GROUP, 1) + Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -58,7 +61,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -76,6 +79,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -87,7 +103,61 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 2 + Map 7 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -107,6 +177,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -118,6 +201,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -130,7 +241,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -143,7 +259,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/spark/union19.q.out ql/src/test/results/clientpositive/spark/union19.q.out index fe5902f395..4f33979fe7 100644 --- ql/src/test/results/clientpositive/spark/union19.q.out +++ ql/src/test/results/clientpositive/spark/union19.q.out @@ -39,8 +39,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 10 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 2), Reducer 9 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 8 (GROUP, 1), Reducer 10 (GROUP, 1) + Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,7 +62,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -92,7 +95,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 2 + Map 8 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 10 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -103,30 +132,23 @@ STAGE PLANS: expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator @@ -147,6 +169,82 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-0 Move Operator @@ -159,7 +257,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-1 Move Operator @@ -172,7 +275,12 @@ STAGE PLANS: name: default.dest2 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/spark/union22.q.out ql/src/test/results/clientpositive/spark/union22.q.out index 6acaba8f70..0eaa132d20 100644 --- ql/src/test/results/clientpositive/spark/union22.q.out +++ ql/src/test/results/clientpositive/spark/union22.q.out @@ -81,7 +81,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: dst_union22_delta @@ -118,7 +118,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -162,6 +162,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -208,6 +210,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -219,7 +240,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -260,7 +281,7 @@ STAGE PLANS: name: default.dst_union22_delta Truncated Path -> Alias: /dst_union22_delta/ds=1 [null-subquery1:$hdt$_0-subquery1:dst_union22_delta] - Map 2 + Map 3 Map Operator Tree: TableScan alias: a @@ -287,7 +308,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col4, _col5 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -324,6 +345,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -337,7 +377,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -378,6 +418,41 @@ STAGE PLANS: name: default.dst_union22 Truncated Path -> Alias: /dst_union22/ds=1 [null-subquery2:$hdt$_0-subquery2:$hdt$_0:a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -407,8 +482,14 @@ STAGE PLANS: name: default.dst_union22 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') select * from diff --git ql/src/test/results/clientpositive/spark/union25.q.out ql/src/test/results/clientpositive/spark/union25.q.out index 559b318717..c5a001a441 100644 --- ql/src/test/results/clientpositive/spark/union25.q.out +++ ql/src/test/results/clientpositive/spark/union25.q.out @@ -189,5 +189,6 @@ STAGE PLANS: name: default.tmp_unionall Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: diff --git ql/src/test/results/clientpositive/spark/union28.q.out ql/src/test/results/clientpositive/spark/union28.q.out index 7ee06fef70..e87fa291e1 100644 --- ql/src/test/results/clientpositive/spark/union28.q.out +++ ql/src/test/results/clientpositive/spark/union28.q.out @@ -41,8 +41,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) - Reducer 5 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +66,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -85,26 +98,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 5 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -123,6 +131,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -135,7 +156,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union PREHOOK: query: insert overwrite table union_subq_union select * from ( diff --git ql/src/test/results/clientpositive/spark/union29.q.out ql/src/test/results/clientpositive/spark/union29.q.out index 05c44d1768..148463d795 100644 --- ql/src/test/results/clientpositive/spark/union29.q.out +++ ql/src/test/results/clientpositive/spark/union29.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -63,48 +65,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -117,7 +104,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -163,17 +155,17 @@ POSTHOOK: Input: default@union_subq_union 0 val_0 0 val_0 0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 2 val_2 2 val_2 4 val_4 4 val_4 -4 val_4 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 +5 val_5 +8 val_8 +8 val_8 +9 val_9 +9 val_9 diff --git ql/src/test/results/clientpositive/spark/union30.q.out ql/src/test/results/clientpositive/spark/union30.q.out index 9d827eb814..4fe054770b 100644 --- ql/src/test/results/clientpositive/spark/union30.q.out +++ ql/src/test/results/clientpositive/spark/union30.q.out @@ -55,8 +55,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 2) - Reducer 6 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -80,28 +80,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Map 3 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 4 Map Operator Tree: TableScan alias: src @@ -120,26 +112,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 6 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -158,6 +145,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -170,7 +170,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union PREHOOK: query: insert overwrite table union_subq_union select * from ( diff --git ql/src/test/results/clientpositive/spark/union31.q.out ql/src/test/results/clientpositive/spark/union31.q.out index 10f8bdb242..2da8cefa96 100644 --- ql/src/test/results/clientpositive/spark/union31.q.out +++ ql/src/test/results/clientpositive/spark/union31.q.out @@ -79,26 +79,28 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 5 (GROUP, 2), Map 7 (GROUP, 2) - Reducer 3 <- Map 6 (GROUP, 2), Map 8 (GROUP, 2) + Reducer 2 <- Map 7 (GROUP, 2), Map 9 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 10 (GROUP, 2), Map 8 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -108,7 +110,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 6 + Map 7 Map Operator Tree: TableScan alias: t1 @@ -118,12 +120,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col1 (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -133,22 +135,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 7 + Map 8 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -158,7 +160,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 8 + Map 9 Map Operator Tree: TableScan alias: t2 @@ -168,12 +170,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col1 (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -203,9 +205,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -223,6 +252,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -235,7 +291,12 @@ STAGE PLANS: name: default.t3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 Stage: Stage-1 Move Operator @@ -248,7 +309,12 @@ STAGE PLANS: name: default.t4 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 PREHOOK: query: from (select * from t1 @@ -362,8 +428,11 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 8 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -387,7 +456,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -422,7 +491,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 Reduce Operator Tree: Forward Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -444,6 +555,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -462,20 +586,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Reducer 5 + Reducer 9 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Forward + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -488,7 +625,12 @@ STAGE PLANS: name: default.t5 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 Stage: Stage-1 Move Operator @@ -501,7 +643,12 @@ STAGE PLANS: name: default.t6 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 PREHOOK: query: from ( @@ -657,7 +804,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (GROUP, 1) + Reducer 5 <- Reducer 8 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -681,7 +831,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -720,7 +870,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Forward Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE @@ -742,6 +920,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -760,6 +951,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Reducer 8 + Reduce Operator Tree: + Forward + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -772,7 +990,12 @@ STAGE PLANS: name: default.t7 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 Stage: Stage-1 Move Operator @@ -785,7 +1008,12 @@ STAGE PLANS: name: default.t8 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 PREHOOK: query: from ( diff --git ql/src/test/results/clientpositive/spark/union33.q.out ql/src/test/results/clientpositive/spark/union33.q.out index def5f69305..f98bbde739 100644 --- ql/src/test/results/clientpositive/spark/union33.q.out +++ ql/src/test/results/clientpositive/spark/union33.q.out @@ -33,8 +33,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -57,7 +58,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -78,7 +92,21 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -92,7 +120,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -112,6 +140,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -124,7 +165,12 @@ STAGE PLANS: name: default.test_src Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( @@ -188,6 +234,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Map 5 (GROUP, 1), Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -211,7 +258,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -231,6 +278,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -265,6 +325,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -277,7 +364,12 @@ STAGE PLANS: name: default.test_src Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( diff --git ql/src/test/results/clientpositive/spark/union4.q.out ql/src/test/results/clientpositive/spark/union4.q.out index cb8c6a2cb8..6833af251a 100644 --- ql/src/test/results/clientpositive/spark/union4.q.out +++ ql/src/test/results/clientpositive/spark/union4.q.out @@ -28,7 +28,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +48,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -86,7 +87,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -109,6 +137,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -121,7 +162,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 diff --git ql/src/test/results/clientpositive/spark/union6.q.out ql/src/test/results/clientpositive/spark/union6.q.out index 6f61839c6e..ec32728577 100644 --- ql/src/test/results/clientpositive/spark/union6.q.out +++ ql/src/test/results/clientpositive/spark/union6.q.out @@ -28,6 +28,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +47,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -63,6 +64,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -82,6 +96,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,7 +135,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 diff --git ql/src/test/results/clientpositive/spark/union_lateralview.q.out ql/src/test/results/clientpositive/spark/union_lateralview.q.out index fe9afb8f88..cf5cf7cc83 100644 --- ql/src/test/results/clientpositive/spark/union_lateralview.q.out +++ ql/src/test/results/clientpositive/spark/union_lateralview.q.out @@ -53,7 +53,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,7 +105,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -152,7 +153,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -184,6 +185,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(arr_ele, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -196,7 +224,12 @@ STAGE PLANS: name: default.test_union_lateral_view Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value diff --git ql/src/test/results/clientpositive/spark/union_top_level.q.out ql/src/test/results/clientpositive/spark/union_top_level.q.out index 6adf6c43e5..fc232658f8 100644 --- ql/src/test/results/clientpositive/spark/union_top_level.q.out +++ ql/src/test/results/clientpositive/spark/union_top_level.q.out @@ -497,7 +497,8 @@ STAGE PLANS: name: default.union_top Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table union_top as select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -570,8 +571,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -592,9 +594,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -612,9 +613,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -632,7 +632,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: @@ -655,7 +654,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -676,7 +702,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -697,6 +736,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -709,7 +761,12 @@ STAGE PLANS: name: default.union_top Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a @@ -774,8 +831,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -796,9 +854,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -816,9 +873,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -836,7 +892,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: @@ -859,7 +914,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -880,7 +962,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -901,6 +996,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -913,7 +1021,12 @@ STAGE PLANS: name: default.union_top Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a diff --git ql/src/test/results/clientpositive/spark/vector_char_4.q.out ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 943a4b1423..eef28b592c 100644 --- ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -139,6 +139,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -146,38 +148,56 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 'hll'), compute_stats(csi, 'hll'), compute_stats(ci, 'hll'), compute_stats(cb, 'hll'), compute_stats(cf, 'hll'), compute_stats(cd, 'hll'), compute_stats(cs, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -190,5 +210,10 @@ STAGE PLANS: name: default.char_lazy_binary_columnar Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index a93c8332bd..101c751741 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index bb922e7f2a..c11406d47a 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out index dbbfd34d37..f8d1ec2425 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out index ffce9e6671..a55250b9f4 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out index 4f25253ef7..680ee42bb6 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join5.q.out @@ -27,10 +27,12 @@ POSTHOOK: Output: default@sorted_mod_4 PREHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@sorted_mod_4 +PREHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE sorted_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@sorted_mod_4 +POSTHOOK: Output: default@sorted_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table stored as orc as select ctinyint, cbigint from alltypesorc limit 100 @@ -57,10 +59,12 @@ POSTHOOK: Output: default@small_table PREHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table +PREHOOK: Output: default@small_table #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table +POSTHOOK: Output: default@small_table #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* @@ -267,10 +271,12 @@ POSTHOOK: Output: default@mod_8_mod_4 PREHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@mod_8_mod_4 +PREHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE mod_8_mod_4 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@mod_8_mod_4 +POSTHOOK: Output: default@mod_8_mod_4 #### A masked pattern was here #### PREHOOK: query: create table small_table2 stored as orc as select pmod(ctinyint, 16) as cmodtinyint, cbigint from alltypesorc limit 100 @@ -297,10 +303,12 @@ POSTHOOK: Output: default@small_table2 PREHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_table2 +PREHOOK: Output: default@small_table2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_table2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_table2 +POSTHOOK: Output: default@small_table2 #### A masked pattern was here #### PREHOOK: query: explain vectorization detail formatted select count(*) from (select s.*, st.* diff --git ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out index 1c8e479512..9d58c5198c 100644 --- ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out @@ -139,6 +139,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -146,38 +148,56 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 'hll'), compute_stats(vsi, 'hll'), compute_stats(vi, 'hll'), compute_stats(vb, 'hll'), compute_stats(vf, 'hll'), compute_stats(vd, 'hll'), compute_stats(vs, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -190,5 +210,10 @@ STAGE PLANS: name: default.varchar_lazy_binary_columnar Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar diff --git ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out index 8cb04c7986..cb53f861b5 100644 --- ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out +++ ql/src/test/results/clientpositive/special_character_in_tabnames_2.q.out @@ -29,10 +29,12 @@ POSTHOOK: Output: default@s/c PREHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value PREHOOK: type: QUERY PREHOOK: Input: default@s/c +PREHOOK: Output: default@s/c #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE `s/c` COMPUTE STATISTICS FOR COLUMNS key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@s/c +POSTHOOK: Output: default@s/c #### A masked pattern was here #### PREHOOK: query: SELECT key, value FROM `s/c` WHERE key > 80 AND key < 100 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats0.q.out ql/src/test/results/clientpositive/stats0.q.out index 0476acbed1..0569507676 100644 --- ql/src/test/results/clientpositive/stats0.q.out +++ ql/src/test/results/clientpositive/stats0.q.out @@ -63,6 +63,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -114,6 +130,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -144,8 +189,14 @@ STAGE PLANS: name: default.stats_non_partitioned Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true PREHOOK: query: insert overwrite table stats_non_partitioned select * from src @@ -717,6 +768,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -731,7 +816,12 @@ STAGE PLANS: name: default.stats_partitioned Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src @@ -1380,6 +1470,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1431,6 +1537,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1470,8 +1605,14 @@ STAGE PLANS: name: default.stats_non_partitioned Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + Stats Work + Basic Stats Work: +#### A masked pattern was here #### + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true Stage: Stage-3 Map Reduce @@ -2228,6 +2369,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -2251,7 +2426,12 @@ STAGE PLANS: name: default.stats_partitioned Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/stats1.q.out ql/src/test/results/clientpositive/stats1.q.out index 5c6049b2b9..afaa27bcc7 100644 --- ql/src/test/results/clientpositive/stats1.q.out +++ ql/src/test/results/clientpositive/stats1.q.out @@ -75,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +105,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -104,7 +143,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value @@ -178,7 +222,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git ql/src/test/results/clientpositive/stats10.q.out ql/src/test/results/clientpositive/stats10.q.out index d1fe47393b..ed9a10abd5 100644 --- ql/src/test/results/clientpositive/stats10.q.out +++ ql/src/test/results/clientpositive/stats10.q.out @@ -17,7 +17,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,6 +49,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -62,7 +79,41 @@ STAGE PLANS: name: default.bucket3_1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src @@ -374,7 +425,8 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table bucket3_1 partition (ds) compute statistics PREHOOK: type: QUERY @@ -414,7 +466,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -453,7 +505,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats12.q.out ql/src/test/results/clientpositive/stats12.q.out index cebdf67d5c..9dbcf43ebf 100644 --- ql/src/test/results/clientpositive/stats12.q.out +++ ql/src/test/results/clientpositive/stats12.q.out @@ -150,8 +150,9 @@ STAGE PLANS: /analyze_srcpart/ds=2008-04-08/hr=12 [analyze_srcpart] Stage: Stage-1 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.analyze_srcpart/ + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.analyze_srcpart/ PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats13.q.out ql/src/test/results/clientpositive/stats13.q.out index 227dfb5478..6403d0616c 100644 --- ql/src/test/results/clientpositive/stats13.q.out +++ ql/src/test/results/clientpositive/stats13.q.out @@ -104,8 +104,9 @@ STAGE PLANS: /analyze_srcpart/ds=2008-04-08/hr=11 [analyze_srcpart] Stage: Stage-1 - Stats-Aggr Operator - Stats Aggregation Key Prefix: default.analyze_srcpart/ + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.analyze_srcpart/ PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats14.q.out ql/src/test/results/clientpositive/stats14.q.out index 85017462c3..1cd660cd67 100644 --- ql/src/test/results/clientpositive/stats14.q.out +++ ql/src/test/results/clientpositive/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats15.q.out ql/src/test/results/clientpositive/stats15.q.out index 85017462c3..1cd660cd67 100644 --- ql/src/test/results/clientpositive/stats15.q.out +++ ql/src/test/results/clientpositive/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats18.q.out ql/src/test/results/clientpositive/stats18.q.out index 4945808098..de8918a40a 100644 --- ql/src/test/results/clientpositive/stats18.q.out +++ ql/src/test/results/clientpositive/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats2.q.out ql/src/test/results/clientpositive/stats2.q.out index 29a659f83c..6442235ed9 100644 --- ql/src/test/results/clientpositive/stats2.q.out +++ ql/src/test/results/clientpositive/stats2.q.out @@ -134,7 +134,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_t1 partition (ds, hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats3.q.out ql/src/test/results/clientpositive/stats3.q.out index 2f76d0e21d..76f3fb22f3 100644 --- ql/src/test/results/clientpositive/stats3.q.out +++ ql/src/test/results/clientpositive/stats3.q.out @@ -54,7 +54,8 @@ STAGE PLANS: name: default.hive_test_src Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: load data local inpath '../../data/files/test.dat' overwrite into table hive_test_src PREHOOK: type: LOAD diff --git ql/src/test/results/clientpositive/stats4.q.out ql/src/test/results/clientpositive/stats4.q.out index 8f503a90b8..cf9e522b2f 100644 --- ql/src/test/results/clientpositive/stats4.q.out +++ ql/src/test/results/clientpositive/stats4.q.out @@ -47,17 +47,13 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1, Stage-10 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -81,6 +77,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -96,6 +108,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -120,7 +166,12 @@ STAGE PLANS: name: default.nzhang_part1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 Stage: Stage-4 Map Reduce @@ -152,15 +203,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -175,37 +217,41 @@ STAGE PLANS: name: default.nzhang_part2 Stage: Stage-9 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 Stage: Stage-10 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' @@ -2305,7 +2351,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2345,7 +2391,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2385,7 +2431,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2425,7 +2471,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/stats5.q.out ql/src/test/results/clientpositive/stats5.q.out index 74ddadba04..a1581855f8 100644 --- ql/src/test/results/clientpositive/stats5.q.out +++ ql/src/test/results/clientpositive/stats5.q.out @@ -27,7 +27,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_src compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats7.q.out ql/src/test/results/clientpositive/stats7.q.out index 4d30413b7f..e14b6b79fe 100644 --- ql/src/test/results/clientpositive/stats7.q.out +++ ql/src/test/results/clientpositive/stats7.q.out @@ -50,7 +50,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats8.q.out ql/src/test/results/clientpositive/stats8.q.out index ea5f1d0a5c..cfae7601af 100644 --- ql/src/test/results/clientpositive/stats8.q.out +++ ql/src/test/results/clientpositive/stats8.q.out @@ -50,7 +50,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 23248 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics PREHOOK: type: QUERY @@ -162,7 +163,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=12) compute statistics PREHOOK: type: QUERY @@ -233,7 +235,8 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-09',hr=11) compute statistics PREHOOK: type: QUERY @@ -304,7 +307,8 @@ STAGE PLANS: Statistics: Num rows: 1500 Data size: 15936 Basic stats: PARTIAL Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-09',hr=12) compute statistics PREHOOK: type: QUERY @@ -375,7 +379,8 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds, hr) compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats9.q.out ql/src/test/results/clientpositive/stats9.q.out index a073b8bfcb..226693360b 100644 --- ql/src/test/results/clientpositive/stats9.q.out +++ ql/src/test/results/clientpositive/stats9.q.out @@ -33,7 +33,8 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 11603 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcbucket compute statistics PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index b855b3896e..185df2d0c7 100644 --- ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -42,6 +42,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll') + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -65,7 +99,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key + Column Types: string + Table: default.tmptable Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/stats_empty_partition.q.out ql/src/test/results/clientpositive/stats_empty_partition.q.out index 289d17a637..4cd1a1b9c6 100644 --- ql/src/test/results/clientpositive/stats_empty_partition.q.out +++ ql/src/test/results/clientpositive/stats_empty_partition.q.out @@ -38,7 +38,7 @@ Database: default Table: tmptable #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 diff --git ql/src/test/results/clientpositive/stats_invalidation.q.out ql/src/test/results/clientpositive/stats_invalidation.q.out index a0e76631a3..a98d98ecfd 100644 --- ql/src/test/results/clientpositive/stats_invalidation.q.out +++ ql/src/test/results/clientpositive/stats_invalidation.q.out @@ -21,10 +21,12 @@ POSTHOOK: Lineage: stats_invalid.value SIMPLE [(src)src.FieldSchema(name:value, PREHOOK: query: analyze table stats_invalid compute statistics for columns key,value PREHOOK: type: QUERY PREHOOK: Input: default@stats_invalid +PREHOOK: Output: default@stats_invalid #### A masked pattern was here #### POSTHOOK: query: analyze table stats_invalid compute statistics for columns key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_invalid +POSTHOOK: Output: default@stats_invalid #### A masked pattern was here #### PREHOOK: query: desc formatted stats_invalid PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/stats_list_bucket.q.out ql/src/test/results/clientpositive/stats_list_bucket.q.out index 0c43b1bb89..e7b7b26235 100644 --- ql/src/test/results/clientpositive/stats_list_bucket.q.out +++ ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -61,7 +61,7 @@ Database: default Table: stats_list_bucket #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -133,7 +133,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git ql/src/test/results/clientpositive/stats_missing_warning.q.out ql/src/test/results/clientpositive/stats_missing_warning.q.out index 0ed70a0960..b90578597b 100644 --- ql/src/test/results/clientpositive/stats_missing_warning.q.out +++ ql/src/test/results/clientpositive/stats_missing_warning.q.out @@ -117,26 +117,32 @@ POSTHOOK: Input: default@missing_stats_t3 PREHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t1 +PREHOOK: Output: default@missing_stats_t1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t1 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t1 +POSTHOOK: Output: default@missing_stats_t1 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t2 +PREHOOK: Output: default@missing_stats_t2 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t2 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t2 +POSTHOOK: Output: default@missing_stats_t2 #### A masked pattern was here #### PREHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@missing_stats_t3 +PREHOOK: Output: default@missing_stats_t3 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE missing_stats_t3 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@missing_stats_t3 +POSTHOOK: Output: default@missing_stats_t3 #### A masked pattern was here #### PREHOOK: query: SELECT COUNT(*) FROM missing_stats_t1 t1 diff --git ql/src/test/results/clientpositive/stats_noscan_1.q.out ql/src/test/results/clientpositive/stats_noscan_1.q.out index ad2ca94baa..a36a0b9a20 100644 --- ql/src/test/results/clientpositive/stats_noscan_1.q.out +++ ql/src/test/results/clientpositive/stats_noscan_1.q.out @@ -44,7 +44,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart PARTITION(ds='2008-04-08',hr=11) compute statistics noscan PREHOOK: type: QUERY @@ -315,7 +316,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart_partial PARTITION(ds='2008-04-08') compute statistics noscan PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats_only_null.q.out ql/src/test/results/clientpositive/stats_only_null.q.out index e7068d3396..8b93d5f2da 100644 --- ql/src/test/results/clientpositive/stats_only_null.q.out +++ ql/src/test/results/clientpositive/stats_only_null.q.out @@ -73,46 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 2000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -123,76 +89,52 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 2120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 2120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null +PREHOOK: Output: default@stats_null #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null +POSTHOOK: Output: default@stats_null #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=2010 +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=2010 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part partition(dt='2010') compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=2010 +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=2010 #### A masked pattern was here #### PREHOOK: query: analyze table stats_null_part partition(dt='2011') compute statistics for columns a,b,c,d PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=2011 +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=2011 #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part partition(dt='2011') compute statistics for columns a,b,c,d POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=2011 +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=2011 #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition (dt='2010') PREHOOK: type: DESCTABLE @@ -364,12 +306,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@stats_null_part PREHOOK: Input: default@stats_null_part@dt=1 PREHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +PREHOOK: Output: default@stats_null_part +PREHOOK: Output: default@stats_null_part@dt=1 +PREHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### POSTHOOK: query: analyze table stats_null_part compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@stats_null_part POSTHOOK: Input: default@stats_null_part@dt=1 POSTHOOK: Input: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Output: default@stats_null_part +POSTHOOK: Output: default@stats_null_part@dt=1 +POSTHOOK: Output: default@stats_null_part@dt=__HIVE_DEFAULT_PARTITION__ #### A masked pattern was here #### PREHOOK: query: describe formatted stats_null_part partition(dt = 1) a PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/stats_partial_size.q.out ql/src/test/results/clientpositive/stats_partial_size.q.out index 4a4061db7e..2838d93dda 100644 --- ql/src/test/results/clientpositive/stats_partial_size.q.out +++ ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -28,10 +28,12 @@ POSTHOOK: Output: default@sample PREHOOK: query: analyze table sample compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@sample +PREHOOK: Output: default@sample #### A masked pattern was here #### POSTHOOK: query: analyze table sample compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@sample +POSTHOOK: Output: default@sample #### A masked pattern was here #### PREHOOK: query: explain select sample_partitioned.x from sample_partitioned, sample where sample.y = sample_partitioned.y PREHOOK: type: QUERY @@ -47,16 +49,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sample_partitioned - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: x (type: int), y (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: sample @@ -81,10 +83,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/stats_partscan_1_23.q.out ql/src/test/results/clientpositive/stats_partscan_1_23.q.out index cf9867d5b7..34f70235cb 100644 --- ql/src/test/results/clientpositive/stats_partscan_1_23.q.out +++ ql/src/test/results/clientpositive/stats_partscan_1_23.q.out @@ -89,7 +89,8 @@ STAGE PLANS: Partial Scan Statistics Stage: Stage-1 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: analyze table analyze_srcpart_partial_scan PARTITION(ds='2008-04-08',hr=11) compute statistics partialscan PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/stats_ppr_all.q.out ql/src/test/results/clientpositive/stats_ppr_all.q.out index 3b9bb30ffa..f19e3f5c2c 100644 --- ql/src/test/results/clientpositive/stats_ppr_all.q.out +++ ql/src/test/results/clientpositive/stats_ppr_all.q.out @@ -46,6 +46,10 @@ PREHOOK: Input: default@ss PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 PREHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 PREHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +PREHOOK: Output: default@ss +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +PREHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +PREHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE ss PARTITION(country,year,month,day) compute statistics for columns POSTHOOK: type: QUERY @@ -53,6 +57,10 @@ POSTHOOK: Input: default@ss POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=1 POSTHOOK: Input: default@ss@country=US/year=2015/month=1/day=2 POSTHOOK: Input: default@ss@country=US/year=2015/month=2/day=1 +POSTHOOK: Output: default@ss +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=1 +POSTHOOK: Output: default@ss@country=US/year=2015/month=1/day=2 +POSTHOOK: Output: default@ss@country=US/year=2015/month=2/day=1 #### A masked pattern was here #### PREHOOK: query: explain select sum(order_amount) from ss where (country="US" and year=2015 and month=2 and day=1) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/statsfs.q.out ql/src/test/results/clientpositive/statsfs.q.out index d070e9aa6f..16da73418f 100644 --- ql/src/test/results/clientpositive/statsfs.q.out +++ ql/src/test/results/clientpositive/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 28c82b85d2..f1dc81d0f4 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -58,18 +58,19 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 + Stage-11 is a root stage + Stage-2 depends on stages: Stage-11 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1, Stage-8 Stage-6 depends on stages: Stage-2 Stage-0 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-0, Stage-8 + Stage-8 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -221,6 +222,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -233,7 +254,12 @@ STAGE PLANS: name: default.src_5 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 Stage: Stage-6 Map Reduce @@ -281,6 +307,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -293,7 +334,34 @@ STAGE PLANS: name: default.src_4 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b @@ -332,15 +400,16 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED +RUN: Stage-11:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:MAPRED RUN: Stage-6:MAPRED RUN: Stage-4:MAPRED RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-1:MOVE -RUN: Stage-7:STATS -RUN: Stage-5:STATS +RUN: Stage-7:COLUMNSTATS +RUN: Stage-5:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -487,7 +556,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b @@ -518,22 +587,23 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 + Stage-11 is a root stage + Stage-15 depends on stages: Stage-11 , consists of Stage-18, Stage-2 + Stage-18 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-18 + Stage-16 depends on stages: Stage-2, Stage-14 + Stage-4 depends on stages: Stage-16 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-12 - Stage-7 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-1, Stage-8 + Stage-17 depends on stages: Stage-2, Stage-14 + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-13 + Stage-7 depends on stages: Stage-0, Stage-8 + Stage-8 depends on stages: Stage-13 Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -576,10 +646,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -593,7 +663,7 @@ STAGE PLANS: 0 1 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -625,7 +695,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: sq_2:s1 @@ -687,6 +757,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -699,9 +789,14 @@ STAGE PLANS: name: default.src_5 Stage: Stage-5 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 - Stage: Stage-16 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: sq_1:a @@ -729,7 +824,7 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 _col0 (type: string), _col1 (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -749,6 +844,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -763,7 +873,34 @@ STAGE PLANS: name: default.src_4 Stage: Stage-7 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Map Reduce @@ -804,7 +941,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -842,18 +979,19 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL +RUN: Stage-11:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-18:MAPREDLOCAL +RUN: Stage-14:MAPRED RUN: Stage-16:MAPREDLOCAL +RUN: Stage-17:MAPREDLOCAL RUN: Stage-4:MAPRED -RUN: Stage-12:MAPRED +RUN: Stage-13:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE -RUN: Stage-5:STATS -RUN: Stage-7:STATS +RUN: Stage-8:MAPRED +RUN: Stage-5:COLUMNSTATS +RUN: Stage-7:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 diff --git ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index 1abe95d31e..5006e85b1b 100644 --- ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -103,7 +103,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -126,7 +127,8 @@ STAGE PLANS: TableScan alias: uservisits_web_text_none Statistics: Num rows: 65 Data size: 7060 Basic stats: COMPLETE Column stats: NONE - GatherStats: false + Statistics Aggregation Key Prefix: default.uservisits_web_text_none/ + GatherStats: true Select Operator expressions: sourceip (type: string), adrevenue (type: float), avgtimeonsite (type: int) outputColumnNames: sourceip, adrevenue, avgtimeonsite @@ -162,6 +164,8 @@ STAGE PLANS: #### A masked pattern was here #### name default.uservisits_web_text_none numFiles 1 + numRows 0 + rawDataSize 0 serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} serialization.format | serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -181,6 +185,8 @@ STAGE PLANS: #### A masked pattern was here #### name default.uservisits_web_text_none numFiles 1 + numRows 0 + rawDataSize 0 serialization.ddl struct uservisits_web_text_none { string sourceip, string desturl, string visitdate, float adrevenue, string useragent, string ccode, string lcode, string skeyword, i32 avgtimeonsite} serialization.format | serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -221,7 +227,9 @@ STAGE PLANS: MultiFileSpray: false Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: + Stats Aggregation Key Prefix: default.uservisits_web_text_none/ Column Stats Desc: Columns: sourceIP, avgTimeOnSite, adRevenue Column Types: string, int, float @@ -231,10 +239,12 @@ STAGE PLANS: PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue PREHOOK: type: QUERY PREHOOK: Input: default@uservisits_web_text_none +PREHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite, adRevenue POSTHOOK: type: QUERY POSTHOOK: Input: default@uservisits_web_text_none +POSTHOOK: Output: default@uservisits_web_text_none #### A masked pattern was here #### PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE @@ -245,6 +255,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sourceIP string 0 55 12.763636363636364 13 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none avgTimeOnSite PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -254,6 +265,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector avgTimeOnSite int 1 9 0 9 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none adRevenue PREHOOK: type: DESCTABLE PREHOOK: Input: default@uservisits_web_text_none @@ -263,6 +275,7 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector adRevenue float 13.099044799804688 492.98870849609375 0 55 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} PREHOOK: query: CREATE TEMPORARY TABLE empty_tab( a int, b double, @@ -338,7 +351,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 - Column Stats Work + Stats Work + Basic Stats Work: Column Stats Desc: Columns: a, b, c, d, e Column Types: int, double, string, boolean, binary @@ -347,10 +361,12 @@ STAGE PLANS: PREHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e PREHOOK: type: QUERY PREHOOK: Input: default@empty_tab +PREHOOK: Output: default@empty_tab #### A masked pattern was here #### POSTHOOK: query: analyze table empty_tab compute statistics for columns a,b,c,d,e POSTHOOK: type: QUERY POSTHOOK: Input: default@empty_tab +POSTHOOK: Output: default@empty_tab #### A masked pattern was here #### PREHOOK: query: desc formatted empty_tab a PREHOOK: type: DESCTABLE @@ -441,6 +457,7 @@ POSTHOOK: query: desc extended default.UserVisits_web_text_none sourceIP POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@uservisits_web_text_none sourceIP string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none sourceIP PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -468,14 +485,17 @@ POSTHOOK: Input: default@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sourceIP string 0 55 12.763636363636364 13 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} PREHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword PREHOOK: type: QUERY PREHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +PREHOOK: Output: test@uservisits_web_text_none POSTHOOK: query: analyze table UserVisits_web_text_none compute statistics for columns sKeyword POSTHOOK: type: QUERY POSTHOOK: Input: test@uservisits_web_text_none #### A masked pattern was here #### +POSTHOOK: Output: test@uservisits_web_text_none PREHOOK: query: desc extended UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -483,6 +503,7 @@ POSTHOOK: query: desc extended UserVisits_web_text_none sKeyword POSTHOOK: type: DESCTABLE POSTHOOK: Input: test@uservisits_web_text_none sKeyword string from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}} PREHOOK: query: desc formatted UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -492,6 +513,7 @@ POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sKeyword string 0 54 7.872727272727273 19 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}} PREHOOK: query: desc formatted test.UserVisits_web_text_none sKeyword PREHOOK: type: DESCTABLE PREHOOK: Input: test@uservisits_web_text_none @@ -501,3 +523,4 @@ POSTHOOK: Input: test@uservisits_web_text_none # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment bitVector sKeyword string 0 54 7.872727272727273 19 HL from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"sKeyword\":\"true\"}} diff --git ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index 59437b80b4..a5813d8c7f 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -77,7 +77,7 @@ POSTHOOK: type: CREATETABLE_AS_SELECT Plan optimized by CBO. Stage-3 - Stats-Aggr Operator + Stats Work{} Stage-4 Create Table Operator: name:default.t @@ -120,21 +120,32 @@ POSTHOOK: query: explain analyze insert overwrite table t select key from src POSTHOOK: type: QUERY Plan optimized by CBO. +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Stage-3 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.t"} Stage-2 Dependency Collection{} Stage-1 - Map 1 - File Output Operator [FS_2] - table:{"name:":"default.t"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=440) + Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 'hll')"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["key"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: select key from src limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index 54d1ce3b8e..ff39e4c76e 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -629,6 +629,22 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_15] (rows=500/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_14] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=242/242 width=95) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +======= Merge Join Operator [MERGEJOIN_15] (rows=253/480 width=190) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=230/242 width=14) @@ -643,6 +659,7 @@ Stage-0 predicate:key is not null TableScan [TS_0] (rows=242/242 width=190) default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] +>>>>>>> asf/master PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -671,6 +688,36 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_27] (rows=1080/1166 width=95) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_25] (rows=500/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242/242 width=91) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] +======= Select Operator [SEL_15] (rows=278/1166 width=3) Output:["_col0","_col1"] Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) @@ -701,6 +748,7 @@ Stage-0 predicate:value is not null TableScan [TS_6] (rows=242/242 width=175) default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] +>>>>>>> asf/master PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -730,6 +778,22 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_15] (rows=500/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_14] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_13] (rows=242/242 width=95) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +======= Merge Join Operator [MERGEJOIN_15] (rows=253/480 width=190) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=230/242 width=14) @@ -744,6 +808,7 @@ Stage-0 predicate:key is not null TableScan [TS_0] (rows=242/242 width=190) default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] +>>>>>>> asf/master PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -776,11 +841,40 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_27] (rows=1080/1166 width=95) +======= Merge Join Operator [MERGEJOIN_27] (rows=278/1166 width=3) +>>>>>>> asf/master Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_25] (rows=500/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242/242 width=91) + Output:["_col1"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab2,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] +======= Merge Join Operator [MERGEJOIN_25] (rows=253/480 width=3) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=230/242 width=3) @@ -804,6 +898,7 @@ Stage-0 predicate:value is not null TableScan [TS_6] (rows=242/242 width=175) default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] +>>>>>>> asf/master PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key UNION ALL @@ -855,21 +950,49 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_22] +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_38] (rows=1892/1646 width=8) +======= Merge Join Operator [MERGEJOIN_38] (rows=531/1646 width=3) +>>>>>>> asf/master Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 +<<<<<<< HEAD + Select Operator [SEL_17] (rows=500/500 width=4) + Output:["_col0"] + Filter Operator [FIL_35] (rows=500/500 width=4) + predicate:key is not null + TableScan [TS_15] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] +======= Select Operator [SEL_17] (rows=475/500 width=3) Output:["_col0"] Filter Operator [FIL_35] (rows=475/500 width=3) predicate:key is not null TableScan [TS_15] (rows=500/500 width=3) default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] +>>>>>>> asf/master <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_36] (rows=500/480 width=4) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_33] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_32] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_0] (rows=242/242 width=4) +======= Merge Join Operator [MERGEJOIN_36] (rows=253/480 width=3) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] <-Select Operator [SEL_5] (rows=230/242 width=3) @@ -883,15 +1006,24 @@ Stage-0 Filter Operator [FIL_32] (rows=230/242 width=3) predicate:key is not null TableScan [TS_0] (rows=242/242 width=3) +>>>>>>> asf/master Output:["key"] <-Map 6 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 +<<<<<<< HEAD + Select Operator [SEL_12] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_34] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_10] (rows=242/242 width=4) +======= Select Operator [SEL_12] (rows=230/242 width=3) Output:["_col0"] Filter Operator [FIL_34] (rows=230/242 width=3) predicate:key is not null TableScan [TS_10] (rows=242/242 width=3) +>>>>>>> asf/master Output:["key"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value @@ -944,30 +1076,81 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_50] (rows=3372/3768 width=8) +======= Merge Join Operator [MERGEJOIN_50] (rows=558/3768 width=3) +>>>>>>> asf/master Conds:Union 3._col0=RS_25._col0(Inner) <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 +<<<<<<< HEAD + Select Operator [SEL_23] (rows=500/500 width=4) + Output:["_col0"] + Filter Operator [FIL_46] (rows=500/500 width=4) + predicate:key is not null + TableScan [TS_21] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] +======= Select Operator [SEL_23] (rows=475/500 width=3) Output:["_col0"] Filter Operator [FIL_46] (rows=475/500 width=3) predicate:key is not null TableScan [TS_21] (rows=500/500 width=3) default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] +>>>>>>> asf/master <-Union 3 [SIMPLE_EDGE] <-Map 8 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 +<<<<<<< HEAD + Select Operator [SEL_18] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_45] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_16] (rows=242/242 width=4) +======= Select Operator [SEL_18] (rows=230/242 width=3) Output:["_col0"] Filter Operator [FIL_45] (rows=230/242 width=3) predicate:key is not null TableScan [TS_16] (rows=242/242 width=3) +>>>>>>> asf/master Output:["key"] <-Reducer 2 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 +<<<<<<< HEAD + Merge Join Operator [MERGEJOIN_49] (rows=1080/1166 width=4) + Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_47] (rows=500/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_43] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_42] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=242/242 width=91) + Output:["_col1"] + Filter Operator [FIL_44] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] +======= Select Operator [SEL_15] (rows=278/1166 width=3) Output:["_col0"] Merge Join Operator [MERGEJOIN_49] (rows=278/1166 width=3) @@ -998,6 +1181,7 @@ Stage-0 predicate:value is not null TableScan [TS_6] (rows=242/242 width=175) default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] +>>>>>>> asf/master PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -1111,29 +1295,48 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 13 <- Union 14 (CONTAINS) -Map 15 <- Union 14 (CONTAINS) -Map 16 <- Union 14 (CONTAINS) -Map 19 <- Union 20 (CONTAINS) -Map 21 <- Union 20 (CONTAINS) -Map 22 <- Union 20 (CONTAINS) -Map 23 <- Union 20 (CONTAINS) -Map 6 <- Union 2 (CONTAINS) -Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 14 (SIMPLE_EDGE) -Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 8 <- Map 18 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE), Union 20 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 16 <- Union 17 (CONTAINS) +Map 18 <- Union 17 (CONTAINS) +Map 19 <- Union 17 (CONTAINS) +Map 22 <- Union 23 (CONTAINS) +Map 24 <- Union 23 (CONTAINS) +Map 25 <- Union 23 (CONTAINS) +Map 26 <- Union 23 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 23 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 14 <- Map 13 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Union 5 (CUSTOM_SIMPLE_EDGE) Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.a"} Stage-4 Dependency Collection{} Stage-3 +<<<<<<< HEAD + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Union 5 [CUSTOM_SIMPLE_EDGE] + <-Reducer 12 [CONTAINS] + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_72] (rows=313/820 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_124] (rows=313/820 width=175) + Conds:RS_69._col1=Union 23._col0(Inner),Output:["_col0","_col3"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_69] +======= Union 5 <-Reducer 12 [CONTAINS] File Output Operator [FS_75] @@ -1158,41 +1361,51 @@ Stage-5 Conds:Union 14._col0=RS_39._col1(Inner),Output:["_col1"] <-Map 10 [SIMPLE_EDGE] SHUFFLE [RS_39] +>>>>>>> asf/master PartitionCols:_col1 - Select Operator [SEL_34] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_111] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_11] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_38] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_108] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_21] (rows=25/25 width=89) - Output:["value"] - <-Map 15 [CONTAINS] - Reduce Output Operator [RS_38] + Merge Join Operator [MERGEJOIN_123] (rows=44/115 width=264) + Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Select Operator [SEL_10] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_106] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_8] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_114] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_49] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 23 [SIMPLE_EDGE] + <-Map 22 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=91) + Select Operator [SEL_54] (rows=25/25 width=89) Output:["_col0"] - Filter Operator [FIL_109] (rows=500/500 width=91) + Filter Operator [FIL_115] (rows=25/25 width=89) predicate:value is not null - TableScan [TS_24] (rows=500/500 width=91) + TableScan [TS_52] (rows=25/25 width=89) Output:["value"] - <-Map 16 [CONTAINS] - Reduce Output Operator [RS_38] + <-Map 24 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_30] (rows=500/500 width=91) + Select Operator [SEL_57] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_110] (rows=500/500 width=91) + Filter Operator [FIL_116] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_28] (rows=500/500 width=91) + TableScan [TS_55] (rows=500/500 width=91) Output:["value"] +<<<<<<< HEAD + <-Map 25 [CONTAINS] + Reduce Output Operator [RS_70] +======= File Output Operator [FS_77] table:{"name:":"default.b"} Please refer to the previous Select Operator [SEL_44] @@ -1231,22 +1444,52 @@ Stage-5 <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] Reduce Output Operator [RS_14] +>>>>>>> asf/master PartitionCols:_col0 - Select Operator [SEL_2] (rows=25/25 width=89) + Select Operator [SEL_61] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_104] (rows=25/25 width=89) + Filter Operator [FIL_117] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_0] (rows=25/25 width=89) + TableScan [TS_59] (rows=500/500 width=91) Output:["value"] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_14] + <-Map 26 [CONTAINS] + Reduce Output Operator [RS_70] PartitionCols:_col0 - Select Operator [SEL_5] (rows=500/500 width=91) + Select Operator [SEL_64] (rows=500/500 width=91) Output:["_col0"] - Filter Operator [FIL_105] (rows=500/500 width=91) + Filter Operator [FIL_118] (rows=500/500 width=91) predicate:value is not null - TableScan [TS_3] (rows=500/500 width=91) + TableScan [TS_62] (rows=500/500 width=91) Output:["value"] +<<<<<<< HEAD + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_72] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_72] + <-Reducer 15 [CONTAINS] + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_44] (rows=5839/5421 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_122] (rows=5839/5421 width=178) + Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col1","_col4"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_42] +======= File Output Operator [FS_77] table:{"name:":"default.b"} Please refer to the previous Select Operator [SEL_20] @@ -1267,68 +1510,153 @@ Stage-5 Conds:RS_66._col0=RS_67._col0(Inner),Output:["_col0","_col1","_col3"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_66] +>>>>>>> asf/master PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_10] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_67] + Select Operator [SEL_37] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_112] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_35] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_121] (rows=2394/2097 width=87) + Conds:Union 17._col0=RS_39._col1(Inner),Output:["_col1"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col1 + Select Operator [SEL_34] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_11] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_108] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_21] (rows=25/25 width=89) + Output:["value"] + <-Map 18 [CONTAINS] + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_109] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_24] (rows=500/500 width=91) + Output:["value"] + <-Map 19 [CONTAINS] + Reduce Output Operator [RS_38] + PartitionCols:_col0 + Select Operator [SEL_30] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_110] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_28] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_44] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_44] + <-Reducer 4 [CONTAINS] + File Output Operator [FS_75] + table:{"name:":"default.a"} + Select Operator [SEL_20] (rows=148/170 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_120] (rows=148/170 width=177) + Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_18] PartitionCols:_col0 - Select Operator [SEL_51] (rows=25/25 width=175) + Select Operator [SEL_13] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_114] (rows=25/25 width=175) + Filter Operator [FIL_107] (rows=500/500 width=178) predicate:key is not null - TableScan [TS_49] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 20 [SIMPLE_EDGE] - <-Map 19 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_115] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_52] (rows=25/25 width=89) - Output:["value"] - <-Map 21 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_57] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_116] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_55] (rows=500/500 width=91) - Output:["value"] - <-Map 22 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_61] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_117] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_59] (rows=500/500 width=91) - Output:["value"] - <-Map 23 [CONTAINS] - Reduce Output Operator [RS_70] - PartitionCols:_col0 - Select Operator [SEL_64] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_118] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_62] (rows=500/500 width=91) - Output:["value"] - File Output Operator [FS_77] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_72] - File Output Operator [FS_79] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_72] + Please refer to the previous TableScan [TS_11] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_119] (rows=61/108 width=86) + Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_10] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_104] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_0] (rows=25/25 width=89) + Output:["value"] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_105] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_3] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_77] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_79] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <- Please refer to the previous Union 5 [CUSTOM_SIMPLE_EDGE] + Reducer 8 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <- Please refer to the previous Union 5 [CUSTOM_SIMPLE_EDGE] Stage-6 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.b"} Please refer to the previous Stage-4 Stage-7 - Stats-Aggr Operator + Stats Work{} Stage-2 Move Operator table:{"name:":"default.c"} @@ -1410,37 +1738,174 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 10 <- Union 2 (CONTAINS) -Map 17 <- Union 18 (CONTAINS) -Map 22 <- Union 18 (CONTAINS) -Map 23 <- Union 20 (CONTAINS) -Map 26 <- Union 27 (CONTAINS) -Map 33 <- Union 27 (CONTAINS) -Map 34 <- Union 29 (CONTAINS) -Map 35 <- Union 31 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE), Union 8 (CONTAINS) -Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 16 <- Map 24 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 19 <- Union 18 (SIMPLE_EDGE), Union 20 (CONTAINS) -Reducer 21 <- Union 20 (SIMPLE_EDGE) -Reducer 28 <- Union 27 (SIMPLE_EDGE), Union 29 (CONTAINS) +Map 13 <- Union 2 (CONTAINS) +Map 20 <- Union 21 (CONTAINS) +Map 25 <- Union 21 (CONTAINS) +Map 26 <- Union 23 (CONTAINS) +Map 29 <- Union 30 (CONTAINS) +Map 36 <- Union 30 (CONTAINS) +Map 37 <- Union 32 (CONTAINS) +Map 38 <- Union 34 (CONTAINS) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 8 (CONTAINS) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 19 <- Map 27 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) +Reducer 24 <- Union 23 (SIMPLE_EDGE) Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 32 <- Union 31 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 31 <- Union 30 (SIMPLE_EDGE), Union 32 (CONTAINS) +Reducer 33 <- Union 32 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 35 <- Union 34 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 17 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 9 <- Union 8 (SIMPLE_EDGE) Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.a"} Stage-4 Dependency Collection{} Stage-3 +<<<<<<< HEAD + Reducer 10 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=6300/319 width=178) + Output:["key","value"] + Group By Operator [GBY_112] (rows=6300/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 8 [SIMPLE_EDGE] + <-Reducer 16 [CONTAINS] + Reduce Output Operator [RS_111] + PartitionCols:_col0, _col1 + Select Operator [SEL_107] (rows=313/304 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_164] (rows=313/304 width=175) + Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_163] (rows=44/115 width=264) + Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_12] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 28 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_68] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 35 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col1 + Select Operator [SEL_100] (rows=1525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_99] (rows=1525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 34 [SIMPLE_EDGE] + <-Map 38 [CONTAINS] + Reduce Output Operator [RS_98] + PartitionCols:_col1, _col0 + Select Operator [SEL_94] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_92] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 33 [CONTAINS] + Reduce Output Operator [RS_98] + PartitionCols:_col1, _col0 + Select Operator [SEL_91] (rows=1025/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_90] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 32 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] + Reduce Output Operator [RS_89] + PartitionCols:_col1, _col0 + Select Operator [SEL_85] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_83] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 31 [CONTAINS] + Reduce Output Operator [RS_89] + PartitionCols:_col1, _col0 + Select Operator [SEL_82] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_81] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 30 [SIMPLE_EDGE] + <-Map 29 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_73] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_71] (rows=25/25 width=175) + Output:["key","value"] + <-Map 36 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_76] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_74] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 7 [CONTAINS] + Reduce Output Operator [RS_111] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=5987/309 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 19 [CONTAINS] + Reduce Output Operator [RS_62] + PartitionCols:_col0, _col1 + Select Operator [SEL_58] (rows=5839/1056 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_162] (rows=5839/1056 width=178) + Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_49] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_161] (rows=2394/512 width=87) + Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=500/500 width=178) +======= Reducer 9 File Output Operator [FS_115] table:{"name:":"default.a"} @@ -1515,28 +1980,125 @@ Stage-5 Reduce Output Operator [RS_89] PartitionCols:_col1, _col0 Select Operator [SEL_82] (rows=525/319 width=178) +>>>>>>> asf/master Output:["_col0","_col1"] - Group By Operator [GBY_81] (rows=525/319 width=178) + Filter Operator [FIL_151] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_15] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=1025/319 width=178) + Output:["_col1"] + Group By Operator [GBY_44] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 27 [SIMPLE_EDGE] + <-Union 23 [SIMPLE_EDGE] <-Map 26 [CONTAINS] - Reduce Output Operator [RS_80] + Reduce Output Operator [RS_43] + PartitionCols:_col1, _col0 + Select Operator [SEL_39] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_37] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 22 [CONTAINS] + Reduce Output Operator [RS_43] + PartitionCols:_col1, _col0 + Select Operator [SEL_36] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_35] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 21 [SIMPLE_EDGE] + <-Map 20 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_27] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_148] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_25] (rows=25/25 width=175) + Output:["key","value"] + <-Map 25 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_30] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_28] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 5 [CONTAINS] + Reduce Output Operator [RS_62] + PartitionCols:_col0, _col1 + Select Operator [SEL_24] (rows=148/61 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_160] (rows=148/61 width=177) + Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500/500 width=178) + predicate:key is not null + Please refer to the previous TableScan [TS_15] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_159] (rows=61/52 width=86) + Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_14] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Select Operator [SEL_11] (rows=525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_10] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_9] PartitionCols:_col1, _col0 - Select Operator [SEL_73] (rows=25/25 width=175) + Select Operator [SEL_2] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=25/25 width=175) + Filter Operator [FIL_144] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_71] (rows=25/25 width=175) + TableScan [TS_0] (rows=25/25 width=175) Output:["key","value"] - <-Map 33 [CONTAINS] - Reduce Output Operator [RS_80] +<<<<<<< HEAD + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_9] PartitionCols:_col1, _col0 - Select Operator [SEL_76] (rows=500/500 width=178) + Select Operator [SEL_5] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=500/500 width=178) + Filter Operator [FIL_145] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_74] (rows=500/500 width=178) + TableScan [TS_3] (rows=500/500 width=178) Output:["key","value"] + Reducer 11 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=6300/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] + Reducer 12 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=6300/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] +======= <-Reducer 7 [CONTAINS] Reduce Output Operator [RS_111] PartitionCols:_col0, _col1 @@ -1672,14 +2234,15 @@ Stage-5 File Output Operator [FS_119] table:{"name:":"default.c"} Please refer to the previous Group By Operator [GBY_112] +>>>>>>> asf/master Stage-6 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.b"} Please refer to the previous Stage-4 Stage-7 - Stats-Aggr Operator + Stats Work{} Stage-2 Move Operator table:{"name:":"default.c"} @@ -1736,19 +2299,71 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-4 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.dest1"} Stage-3 Dependency Collection{} Stage-2 +<<<<<<< HEAD + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_18] + table:{"name:":"default.dest1"} + Select Operator [SEL_16] (rows=205/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_15] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_11] (rows=501/310 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0, _col1 + Select Operator [SEL_6] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0, _col1 + Select Operator [SEL_4] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_3] (rows=1/2 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] +======= Reducer 5 File Output Operator [FS_18] table:{"name:":"default.dest1"} @@ -1783,13 +2398,14 @@ Stage-4 default@src,s1,Tbl:COMPLETE,Col:COMPLETE File Output Operator [FS_24] table:{"name:":"default.dest2"} +>>>>>>> asf/master Select Operator [SEL_22] (rows=501/310 width=456) Output:["_col0","_col1","_col2"] Group By Operator [GBY_21] (rows=501/310 width=280) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 Please refer to the previous Group By Operator [GBY_11] Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.dest2"} @@ -1967,20 +2583,24 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.dest1"} Stage-3 Dependency Collection{} Stage-2 +<<<<<<< HEAD +======= Reducer 4 File Output Operator [FS_16] table:{"name:":"default.dest1"} @@ -2024,16 +2644,76 @@ Stage-4 Reduce Output Operator [RS_18] PartitionCols:_col0, _col1 Please refer to the previous Select Operator [SEL_4] +>>>>>>> asf/master Reducer 5 - File Output Operator [FS_22] - table:{"name:":"default.dest2"} - Select Operator [SEL_20] (rows=1001/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_19] (rows=1001/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_16] + table:{"name:":"default.dest1"} + Select Operator [SEL_14] (rows=205/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_13] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Select Operator [SEL_6] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_18] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_6] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_8] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_18] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_9] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_12] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_3] (rows=1/2 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_18] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_4] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_14] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_22] + table:{"name:":"default.dest2"} + Select Operator [SEL_20] (rows=1001/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_19] (rows=1001/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=1001/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_20] Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.dest2"} @@ -2086,19 +2766,23 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Stage-4 - Stats-Aggr Operator + Stats Work{} Stage-0 Move Operator table:{"name:":"default.dest1"} Stage-3 Dependency Collection{} Stage-2 +<<<<<<< HEAD +======= Reducer 4 File Output Operator [FS_14] table:{"name:":"default.dest1"} @@ -2132,16 +2816,66 @@ Stage-4 Reduce Output Operator [RS_16] PartitionCols:_col0, _col1 Please refer to the previous Select Operator [SEL_4] +>>>>>>> asf/master Reducer 5 - File Output Operator [FS_20] - table:{"name:":"default.dest2"} - Select Operator [SEL_18] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_17] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_14] + table:{"name:":"default.dest1"} + Select Operator [SEL_12] (rows=205/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_11] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0 + Select Operator [SEL_6] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_5] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_6] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_10] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_3] (rows=1/2 width=8) + Output:["_col0"],aggregations:["count()"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_16] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_4] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_12] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/2 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_20] + table:{"name:":"default.dest2"} + Select Operator [SEL_18] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_17] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_18] Stage-5 - Stats-Aggr Operator + Stats Work{} Stage-1 Move Operator table:{"name:":"default.dest2"} diff --git ql/src/test/results/clientpositive/truncate_column.q.out ql/src/test/results/clientpositive/truncate_column.q.out index cc3bc89cfe..7ff963cdc2 100644 --- ql/src/test/results/clientpositive/truncate_column.q.out +++ ql/src/test/results/clientpositive/truncate_column.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 @@ -297,7 +297,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 10 @@ -495,7 +495,7 @@ Database: default Table: test_tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 diff --git ql/src/test/results/clientpositive/tunable_ndv.q.out ql/src/test/results/clientpositive/tunable_ndv.q.out index a7839ab70e..5406b6b828 100644 --- ql/src/test/results/clientpositive/tunable_ndv.q.out +++ ql/src/test/results/clientpositive/tunable_ndv.q.out @@ -60,12 +60,18 @@ PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_1d PREHOOK: Input: default@loc_orc_1d@year=2000 PREHOOK: Input: default@loc_orc_1d@year=2001 +PREHOOK: Output: default@loc_orc_1d +PREHOOK: Output: default@loc_orc_1d@year=2000 +PREHOOK: Output: default@loc_orc_1d@year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_1d compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_1d POSTHOOK: Input: default@loc_orc_1d@year=2000 POSTHOOK: Input: default@loc_orc_1d@year=2001 +POSTHOOK: Output: default@loc_orc_1d +POSTHOOK: Output: default@loc_orc_1d@year=2000 +POSTHOOK: Output: default@loc_orc_1d@year=2001 #### A masked pattern was here #### PREHOOK: query: describe formatted loc_orc_1d partition(year=2000) locid PREHOOK: type: DESCTABLE @@ -152,41 +158,57 @@ PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2000') compu PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2000 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2000') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2000 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2000 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2000') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2000 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2000 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94086, year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94086/year=2001 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94086/year=2001 #### A masked pattern was here #### PREHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc_2d PREHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 +PREHOOK: Output: default@loc_orc_2d +PREHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc_2d partition(zip=94087, year='2001') compute statistics for columns state,locid POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc_2d POSTHOOK: Input: default@loc_orc_2d@zip=94087/year=2001 +POSTHOOK: Output: default@loc_orc_2d +POSTHOOK: Output: default@loc_orc_2d@zip=94087/year=2001 #### A masked pattern was here #### PREHOOK: query: describe formatted loc_orc_2d locid PREHOOK: type: DESCTABLE diff --git ql/src/test/results/clientpositive/udf1.q.out ql/src/test/results/clientpositive/udf1.q.out index eebd90f891..27dbfa47d9 100644 --- ql/src/test/results/clientpositive/udf1.q.out +++ ql/src/test/results/clientpositive/udf1.q.out @@ -64,6 +64,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll'), compute_stats(c12, 'hll'), compute_stats(c13, 'hll'), compute_stats(c14, 'hll'), compute_stats(c15, 'hll'), compute_stats(c16, 'hll'), compute_stats(c17, 'hll'), compute_stats(c18, 'hll'), compute_stats(c19, 'hll'), compute_stats(c20, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8800 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -85,7 +111,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Column Types: string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf3.q.out ql/src/test/results/clientpositive/udf3.q.out index 96038f12af..9f9f56fe02 100644 --- ql/src/test/results/clientpositive/udf3.q.out +++ ql/src/test/results/clientpositive/udf3.q.out @@ -55,6 +55,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: c1, c2, c3, c4, c5 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll') + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,7 +87,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, c2, c3, c4, c5 + Column Types: string, string, string, string, string + Table: default.dest1 PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(CAST('' AS INT)), sum(CAST('' AS INT)), avg(CAST('' AS INT)), min(CAST('' AS INT)), max(CAST('' AS INT)) diff --git ql/src/test/results/clientpositive/udf_10_trims.q.out ql/src/test/results/clientpositive/udf_10_trims.q.out index 3a5303adfe..4f08014ee2 100644 --- ql/src/test/results/clientpositive/udf_10_trims.q.out +++ ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -50,6 +50,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -71,7 +97,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_character_length.q.out ql/src/test/results/clientpositive/udf_character_length.q.out index 332ec95644..c960f20c91 100644 --- ql/src/test/results/clientpositive/udf_character_length.q.out +++ ql/src/test/results/clientpositive/udf_character_length.q.out @@ -71,6 +71,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -92,7 +118,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_length.q.out ql/src/test/results/clientpositive/udf_length.q.out index fc795bbcf4..0c7d952a0c 100644 --- ql/src/test/results/clientpositive/udf_length.q.out +++ ql/src/test/results/clientpositive/udf_length.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -75,7 +101,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_octet_length.q.out ql/src/test/results/clientpositive/udf_octet_length.q.out index f8738f813c..3d03614f14 100644 --- ql/src/test/results/clientpositive/udf_octet_length.q.out +++ ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -75,7 +101,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/udf_reverse.q.out ql/src/test/results/clientpositive/udf_reverse.q.out index 28b0c9f197..abe326c9d6 100644 --- ql/src/test/results/clientpositive/udf_reverse.q.out +++ ql/src/test/results/clientpositive/udf_reverse.q.out @@ -54,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -75,7 +101,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: string + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/union10.q.out ql/src/test/results/clientpositive/union10.q.out index e14e5e0dd2..0bb9ccd1e5 100644 --- ql/src/test/results/clientpositive/union10.q.out +++ ql/src/test/results/clientpositive/union10.q.out @@ -88,6 +88,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +116,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +144,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -139,7 +191,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union12.q.out ql/src/test/results/clientpositive/union12.q.out index 10540f9c49..93feadf454 100644 --- ql/src/test/results/clientpositive/union12.q.out +++ ql/src/test/results/clientpositive/union12.q.out @@ -88,6 +88,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +116,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +144,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -139,7 +191,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union17.q.out ql/src/test/results/clientpositive/union17.q.out index bff29f6f4e..828ab0b93f 100644 --- ql/src/test/results/clientpositive/union17.q.out +++ ql/src/test/results/clientpositive/union17.q.out @@ -32,10 +32,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-8 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1, Stage-5, Stage-8 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -152,6 +154,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -164,13 +181,48 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -194,6 +246,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -205,8 +272,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 - Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union18.q.out ql/src/test/results/clientpositive/union18.q.out index 702ff103f9..1e7473ec27 100644 --- ql/src/test/results/clientpositive/union18.q.out +++ ql/src/test/results/clientpositive/union18.q.out @@ -34,17 +34,13 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 - Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-1 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-1, Stage-11 + Stage-11 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -96,6 +92,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -107,6 +116,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -125,6 +149,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -136,6 +173,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -157,7 +222,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 Stage: Stage-5 Map Reduce @@ -189,15 +259,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -209,37 +270,34 @@ STAGE PLANS: name: default.dest2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union19.q.out ql/src/test/results/clientpositive/union19.q.out index 35530912ff..fa0e7dfb8d 100644 --- ql/src/test/results/clientpositive/union19.q.out +++ ql/src/test/results/clientpositive/union19.q.out @@ -32,9 +32,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -101,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +151,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -165,7 +212,42 @@ STAGE PLANS: name: default.dest1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 Stage: Stage-1 Move Operator @@ -177,8 +259,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git ql/src/test/results/clientpositive/union22.q.out ql/src/test/results/clientpositive/union22.q.out index 9134bdf19f..ce4fa62359 100644 --- ql/src/test/results/clientpositive/union22.q.out +++ ql/src/test/results/clientpositive/union22.q.out @@ -97,7 +97,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -225,7 +225,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -272,7 +272,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -361,6 +361,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -395,6 +414,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 'hll'), compute_stats(k2, 'hll'), compute_stats(k3, 'hll'), compute_stats(k4, 'hll') + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -428,7 +466,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -470,6 +508,40 @@ STAGE PLANS: Truncated Path -> Alias: /dst_union22_delta/ds=1 [null-subquery1:$hdt$_0-subquery1:dst_union22_delta] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -499,8 +571,14 @@ STAGE PLANS: name: default.dst_union22 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false Stage: Stage-4 Map Reduce @@ -558,7 +636,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -605,7 +683,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 diff --git ql/src/test/results/clientpositive/union25.q.out ql/src/test/results/clientpositive/union25.q.out index a287a97967..9eac674e7d 100644 --- ql/src/test/results/clientpositive/union25.q.out +++ ql/src/test/results/clientpositive/union25.q.out @@ -197,5 +197,6 @@ STAGE PLANS: name: default.tmp_unionall Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: diff --git ql/src/test/results/clientpositive/union28.q.out ql/src/test/results/clientpositive/union28.q.out index c3789d08e9..f6b4b1c95c 100644 --- ql/src/test/results/clientpositive/union28.q.out +++ ql/src/test/results/clientpositive/union28.q.out @@ -102,6 +102,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -117,6 +130,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +158,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -153,7 +205,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union29.q.out ql/src/test/results/clientpositive/union29.q.out index 87ba275b87..0eea64f3d6 100644 --- ql/src/test/results/clientpositive/union29.q.out +++ ql/src/test/results/clientpositive/union29.q.out @@ -67,6 +67,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -88,6 +101,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -109,6 +135,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -130,7 +182,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/union30.q.out ql/src/test/results/clientpositive/union30.q.out index 862ebc11d3..76b68a6e37 100644 --- ql/src/test/results/clientpositive/union30.q.out +++ ql/src/test/results/clientpositive/union30.q.out @@ -116,6 +116,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -137,6 +150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +178,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -167,6 +206,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -188,7 +253,12 @@ STAGE PLANS: name: default.union_subq_union Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union31.q.out ql/src/test/results/clientpositive/union31.q.out index b7a63fc6f0..6de0b40f29 100644 --- ql/src/test/results/clientpositive/union31.q.out +++ ql/src/test/results/clientpositive/union31.q.out @@ -71,10 +71,12 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-4, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-4, Stage-7 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -181,6 +183,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -193,13 +210,48 @@ STAGE PLANS: name: default.t3 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -224,6 +276,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -235,8 +302,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from t1 @@ -340,12 +426,14 @@ insert overwrite table t6 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-8 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-6 is a root stage + Stage-7 depends on stages: Stage-3 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-2 @@ -426,6 +514,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -444,6 +547,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -456,7 +574,42 @@ STAGE PLANS: name: default.t5 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 Stage: Stage-1 Move Operator @@ -468,10 +621,29 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -653,9 +825,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-5, Stage-7 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-1, Stage-5, Stage-7 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -752,6 +926,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -770,6 +959,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 'hll'), compute_stats(cnt, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -782,7 +986,42 @@ STAGE PLANS: name: default.t7 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 Stage: Stage-1 Move Operator @@ -794,8 +1033,27 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 - Stage: Stage-5 - Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from ( diff --git ql/src/test/results/clientpositive/union33.q.out ql/src/test/results/clientpositive/union33.q.out index 17aeecd254..5b74d41338 100644 --- ql/src/test/results/clientpositive/union33.q.out +++ ql/src/test/results/clientpositive/union33.q.out @@ -124,6 +124,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +148,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -156,7 +195,12 @@ STAGE PLANS: name: default.test_src Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src Stage: Stage-4 Map Reduce @@ -330,6 +374,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -350,6 +407,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -371,7 +454,12 @@ STAGE PLANS: name: default.test_src Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src Stage: Stage-5 Map Reduce diff --git ql/src/test/results/clientpositive/union4.q.out ql/src/test/results/clientpositive/union4.q.out index f3cd59c42e..948b2b0d4b 100644 --- ql/src/test/results/clientpositive/union4.q.out +++ ql/src/test/results/clientpositive/union4.q.out @@ -83,6 +83,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE @@ -98,6 +111,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -119,7 +158,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union6.q.out ql/src/test/results/clientpositive/union6.q.out index fc66cf1254..ddfd54fd2e 100644 --- ql/src/test/results/clientpositive/union6.q.out +++ ql/src/test/results/clientpositive/union6.q.out @@ -78,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +108,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -116,7 +155,12 @@ STAGE PLANS: name: default.tmptable Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable Stage: Stage-4 Map Reduce diff --git ql/src/test/results/clientpositive/union_lateralview.q.out ql/src/test/results/clientpositive/union_lateralview.q.out index f563476de0..6cd7ac4860 100644 --- ql/src/test/results/clientpositive/union_lateralview.q.out +++ ql/src/test/results/clientpositive/union_lateralview.q.out @@ -47,7 +47,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -178,6 +179,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(arr_ele, 'hll'), compute_stats(value, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -190,7 +206,34 @@ STAGE PLANS: name: default.test_union_lateral_view Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1288 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value diff --git ql/src/test/results/clientpositive/union_stats.q.out ql/src/test/results/clientpositive/union_stats.q.out index 9ea0b519c2..30abb6813a 100644 --- ql/src/test/results/clientpositive/union_stats.q.out +++ ql/src/test/results/clientpositive/union_stats.q.out @@ -156,7 +156,8 @@ STAGE PLANS: name: default.t Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: #### A masked pattern was here #### Stage: Stage-3 @@ -487,7 +488,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 @@ -522,7 +523,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 diff --git ql/src/test/results/clientpositive/updateAccessTime.q.out ql/src/test/results/clientpositive/updateAccessTime.q.out index 2dcd930b8e..54410ad233 100644 --- ql/src/test/results/clientpositive/updateAccessTime.q.out +++ ql/src/test/results/clientpositive/updateAccessTime.q.out @@ -226,8 +226,10 @@ POSTHOOK: Output: default@src PREHOOK: query: ANALYZE TABLE src COMPUTE STATISTICS FOR COLUMNS key,value PREHOOK: type: QUERY PREHOOK: Input: default@src +PREHOOK: Output: default@src #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE src COMPUTE STATISTICS FOR COLUMNS key,value POSTHOOK: type: QUERY POSTHOOK: Input: default@src +POSTHOOK: Output: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/vector_bucket.q.out ql/src/test/results/clientpositive/vector_bucket.q.out index 3b74023c2b..4c924125dd 100644 --- ql/src/test/results/clientpositive/vector_bucket.q.out +++ ql/src/test/results/clientpositive/vector_bucket.q.out @@ -19,7 +19,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.non_orc_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll') + Group By Vectorization: + groupByMode: HASH + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,7 +109,61 @@ STAGE PLANS: name: default.non_orc_table Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.non_orc_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 864 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: select a, b from non_orc_table order by a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out index 1c58fd209b..68817eb811 100644 --- ql/src/test/results/clientpositive/vector_char_4.q.out +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -148,38 +148,60 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 'hll'), compute_stats(csi, 'hll'), compute_stats(ci, 'hll'), compute_stats(cb, 'hll'), compute_stats(cf, 'hll'), compute_stats(cd, 'hll'), compute_stats(cs, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -201,7 +223,12 @@ STAGE PLANS: name: default.char_lazy_binary_columnar Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out index 72ea17b724..2a19047339 100644 --- ql/src/test/results/clientpositive/vector_char_simple.q.out +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -211,7 +211,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -244,11 +245,46 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: + Group By Vectorization: + groupByMode: HASH + vectorOutput: false + native: false + vectorProcessingMode: NONE Stage: Stage-0 Stage: Stage-2 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan Vectorization: + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 34b571e32d..0f37041fab 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -34,7 +34,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -147,6 +149,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -159,7 +171,110 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + Group By Vectorization: + groupByMode: PARTIAL1 + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + groupByMode: FINAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index bc86c15137..ec2d781be2 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -34,7 +34,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-0, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -147,6 +149,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -159,7 +171,110 @@ STAGE PLANS: name: default.dest1 Stage: Stage-3 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 'hll') + Group By Vectorization: + groupByMode: PARTIAL1 + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + groupByMode: FINAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) diff --git ql/src/test/results/clientpositive/vector_multi_insert.q.out ql/src/test/results/clientpositive/vector_multi_insert.q.out index 4013cd4694..d0e01d7d37 100644 --- ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -84,24 +84,16 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 - Stage-4 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-0, Stage-11, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 - Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-1 depends on stages: Stage-3 + Stage-10 depends on stages: Stage-1, Stage-11, Stage-13 + Stage-11 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2, Stage-11, Stage-13 + Stage-2 depends on stages: Stage-3 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -125,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -140,6 +145,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -155,15 +175,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -185,7 +234,12 @@ STAGE PLANS: name: default.orc_rn1 Stage: Stage-4 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 Stage: Stage-5 Map Reduce @@ -217,15 +271,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -237,46 +282,55 @@ STAGE PLANS: name: default.orc_rn2 Stage: Stage-10 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Stage: Stage-12 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 Stage: Stage-2 Move Operator @@ -288,38 +342,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Stage: Stage-16 - Stats-Aggr Operator - - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index 70bce01c76..32d5581ce7 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -186,10 +186,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 2265cb83bc..43f53ae320 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index e4e482500f..d5c536c51e 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -191,10 +191,12 @@ POSTHOOK: Output: default@small_alltypesorc_a PREHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_a +PREHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a +POSTHOOK: Output: default@small_alltypesorc_a #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_a PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index 125ec07353..7858f180b6 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -201,10 +201,12 @@ POSTHOOK: Output: default@small_alltypesorc_b PREHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@small_alltypesorc_b +PREHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE small_alltypesorc_b COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b +POSTHOOK: Output: default@small_alltypesorc_b #### A masked pattern was here #### PREHOOK: query: select * from small_alltypesorc_b PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out index d611103eed..047f9cb707 100644 --- ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/vector_outer_reference_windowed.q.out @@ -95,28 +95,34 @@ c1 c2 PREHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@e011_01 +PREHOOK: Output: default@e011_01 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE e011_01 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_01 +POSTHOOK: Output: default@e011_01 #### A masked pattern was here #### _c0 _c1 PREHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@e011_02 +PREHOOK: Output: default@e011_02 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE e011_02 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_02 +POSTHOOK: Output: default@e011_02 #### A masked pattern was here #### _c0 _c1 PREHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS PREHOOK: type: QUERY PREHOOK: Input: default@e011_03 +PREHOOK: Output: default@e011_03 #### A masked pattern was here #### POSTHOOK: query: ANALYZE TABLE e011_03 COMPUTE STATISTICS FOR COLUMNS POSTHOOK: type: QUERY POSTHOOK: Input: default@e011_03 +POSTHOOK: Output: default@e011_03 #### A masked pattern was here #### _c0 _c1 PREHOOK: query: explain vectorization detail @@ -141,7 +147,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -152,7 +158,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(c1) Group By Vectorization: @@ -330,7 +336,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -341,7 +347,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(c1) Group By Vectorization: @@ -356,7 +362,7 @@ STAGE PLANS: keys: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) sort order: ++ @@ -366,7 +372,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized Map Vectorization: @@ -398,7 +404,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -422,7 +428,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized Map Vectorization: @@ -446,7 +452,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -466,14 +472,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -535,19 +541,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(15,2)) TableScan alias: e011_03 @@ -579,7 +585,7 @@ STAGE PLANS: 0 _col0 (type: decimal(15,2)) 1 _col0 (type: decimal(15,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -591,7 +597,7 @@ STAGE PLANS: keys: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -615,7 +621,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized Map Vectorization: @@ -647,7 +653,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -671,7 +677,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized Map Vectorization: @@ -695,7 +701,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -715,14 +721,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -788,19 +794,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: decimal(15,2)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE TableScan alias: e011_03 Statistics: Num rows: 4 Data size: 36 Basic stats: COMPLETE Column stats: NONE @@ -832,7 +838,7 @@ STAGE PLANS: 0 _col0 (type: decimal(15,2)) 1 _col0 (type: decimal(15,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -844,7 +850,7 @@ STAGE PLANS: keys: _col1 (type: decimal(15,2)), _col2 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -868,7 +874,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized Map Vectorization: @@ -900,7 +906,7 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -924,7 +930,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized Map Vectorization: @@ -948,7 +954,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: decimal(15,2)), KEY.reducesinkkey0 (type: decimal(15,2)), VALUE._col0 (type: decimal(25,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -968,14 +974,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: decimal(35,2)) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1040,19 +1046,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: e011_01 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c1 (type: decimal(15,2)), c2 (type: decimal(15,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: decimal(15,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(15,2)) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(15,2)) TableScan alias: e011_03 @@ -1085,7 +1091,7 @@ STAGE PLANS: 0 _col0 (type: decimal(15,2)) 1 _col0 (type: decimal(15,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: corr(_col0, _col2) Group By Vectorization: @@ -1097,7 +1103,7 @@ STAGE PLANS: keys: _col1 (type: decimal(15,2)), _col3 (type: decimal(15,2)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1121,7 +1127,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 13 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct) Execution mode: vectorized Map Vectorization: @@ -1153,11 +1159,11 @@ STAGE PLANS: keys: KEY._col0 (type: decimal(15,2)), KEY._col1 (type: decimal(15,2)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: decimal(15,2)), _col0 (type: decimal(15,2)), _col2 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -1177,14 +1183,14 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: sum_window_0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index 283762a3eb..15e12f37b8 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -114,44 +114,60 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [12] - selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 - Execution mode: vectorized + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: t + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(t, 'hll') + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: decimal(18,9) + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -173,7 +189,12 @@ STAGE PLANS: name: default.decimal_2 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: t + Column Types: decimal(18,9) + Table: default.decimal_2 Stage: Stage-3 Merge File Operator @@ -381,7 +402,8 @@ STAGE PLANS: isTemporary: true Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/vector_udf_character_length.q.out ql/src/test/results/clientpositive/vector_udf_character_length.q.out index 81d801c930..42a79a07eb 100644 --- ql/src/test/results/clientpositive/vector_udf_character_length.q.out +++ ql/src/test/results/clientpositive/vector_udf_character_length.q.out @@ -71,7 +71,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -93,7 +118,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/vector_udf_octet_length.q.out ql/src/test/results/clientpositive/vector_udf_octet_length.q.out index c71cfef83f..c2c0626368 100644 --- ql/src/test/results/clientpositive/vector_udf_octet_length.q.out +++ ql/src/test/results/clientpositive/vector_udf_octet_length.q.out @@ -54,7 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -76,7 +101,12 @@ STAGE PLANS: name: default.dest1 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 Stage: Stage-3 Map Reduce diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out index 205c67a6ae..b1f4fb3b55 100644 --- ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -148,38 +148,60 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 'hll'), compute_stats(vsi, 'hll'), compute_stats(vi, 'hll'), compute_stats(vb, 'hll'), compute_stats(vf, 'hll'), compute_stats(vd, 'hll'), compute_stats(vs, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3080 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -201,7 +223,12 @@ STAGE PLANS: name: default.varchar_lazy_binary_columnar Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar Stage: Stage-3 Merge File Operator diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 0f8bdb58c3..345afb2370 100644 --- ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -275,7 +275,8 @@ PLAN VECTORIZATION: STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-0, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -309,7 +310,6 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized Map Vectorization: @@ -344,6 +344,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.varchar_3 + Select Operator + expressions: _col0 (type: varchar(25)) + outputColumnNames: field + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(field, 'hll') + Group By Vectorization: + groupByMode: HASH + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -356,7 +377,61 @@ STAGE PLANS: name: default.varchar_3 Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: field + Column Types: varchar(25) + Table: default.varchar_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/vector_windowing_streaming.q.out ql/src/test/results/clientpositive/vector_windowing_streaming.q.out index 0198e2b9a7..687a072883 100644 --- ql/src/test/results/clientpositive/vector_windowing_streaming.q.out +++ ql/src/test/results/clientpositive/vector_windowing_streaming.q.out @@ -763,7 +763,8 @@ STAGE PLANS: name: default.sD Stage: Stage-2 - Stats-Aggr Operator + Stats Work + Basic Stats Work: PREHOOK: query: create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5