diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 5217208611..b4cc76ac49 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -68,6 +68,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -348,19 +350,54 @@ public long computeOnlineDataSizeOptimized(Statistics statistics) { public long computeOnlineDataSizeGeneric(Statistics statistics, long overHeadPerRow, long overHeadPerSlot) { - long onlineDataSize = 0; long numRows = statistics.getNumRows(); if (numRows <= 0) { numRows = 1; } long worstCaseNeededSlots = 1L << DoubleMath.log2(numRows / hashTableLoadFactor, RoundingMode.UP); - onlineDataSize += statistics.getDataSize(); + onlineDataSize += statistics.getDataSize() - hashTableDataSizeAdjustment(numRows, statistics.getColumnStats()); onlineDataSize += overHeadPerRow * statistics.getNumRows(); onlineDataSize += overHeadPerSlot * worstCaseNeededSlots; return onlineDataSize; } + /** + * In data calculation logic, we include some overhead due to java object refs, etc. + * However, this overhead may be different when storing values in hashtable for mapjoin. + * Hence, we calculate a size adjustment to the original data size for a given input. + */ + private static long hashTableDataSizeAdjustment(long numRows, List colStats) { + long result = 0; + + if (numRows <= 0 || colStats == null || colStats.isEmpty()) { + return result; + } + + for (ColStatistics cs : colStats) { + if (cs != null) { + String colTypeLowerCase = cs.getColumnType().toLowerCase(); + long nonNullCount = cs.getNumNulls() > 0 ? numRows - cs.getNumNulls() + 1 : numRows; + double overhead = 0; + if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME) + || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)) { + overhead = JavaDataModel.get().lengthForStringOfLength(0); + } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) { + overhead = JavaDataModel.get().lengthForByteArrayOfSize(0); + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME) || + colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { + overhead = JavaDataModel.get().object(); + } + result = StatsUtils.safeAdd(StatsUtils.safeMult(nonNullCount, overhead), result); + } + } + + return result; + } + @VisibleForTesting public MemoryMonitorInfo getMemoryMonitorInfo( final HiveConf conf, diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index 1a28ce84f7..a8bc8b887e 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -87,23 +87,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 4 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a @@ -116,13 +105,15 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -175,6 +166,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -368,23 +381,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 4 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a @@ -397,13 +399,15 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -456,6 +460,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -649,23 +675,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 4 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a @@ -678,13 +693,15 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -737,6 +754,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index bbc06fabb0..8fe30f0cc5 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -481,7 +481,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -505,7 +505,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -518,36 +518,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -578,7 +571,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -602,7 +595,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 + Map 2 Map Operator Tree: TableScan alias: b @@ -615,36 +608,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 382 Data size: 71052 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1636,9 +1622,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1662,7 +1647,7 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: b @@ -1690,31 +1675,25 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: string) - 1 _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + input vertices: + 1 Map 4 + Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 0674ae4380..16274f39b7 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -57,9 +57,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -80,13 +80,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 2 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 input vertices: - 1 Map 4 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 39 Data size: 10374 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -153,7 +153,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 4 + Map 2 Map Operator Tree: TableScan alias: x @@ -232,7 +232,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 5 + Map 3 Map Operator Tree: TableScan alias: z @@ -247,15 +247,72 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 39 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1_n21 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j1_n21 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n21 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### @@ -311,73 +368,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col4 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: string), _col4 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1_n21 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j1_n21 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n21 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16884 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Reducer 3 + Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -611,9 +602,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE) + Map 4 <- Map 3 (BROADCAST_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -695,7 +686,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 5 + Map 2 Map Operator Tree: TableScan alias: z @@ -774,7 +765,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [z] - Map 6 + Map 3 Map Operator Tree: TableScan alias: y @@ -789,15 +780,30 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - tag: 2 - value expressions: _col1 (type: string) - auto parallelism: true + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Estimated key counts: Map 1 => 25, Map 2 => 25 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5 + input vertices: + 0 Map 1 + 1 Map 2 + Position of Big Table: 2 + Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -853,7 +859,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 7 + Map 4 Map Operator Tree: TableScan alias: w @@ -868,15 +874,72 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 3 => 61 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col3, _col5 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1_n21 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1_n21 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1_n21 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### @@ -931,97 +994,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5 - Position of Big Table: 2 - Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 61 Data size: 21655 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) - auto parallelism: true - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col3, _col5 - Position of Big Table: 0 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j1_n21 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1_n21 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1_n21 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 99 Data size: 26334 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Reducer 4 + Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1252,9 +1225,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1275,13 +1248,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 2 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 2 Position of Big Table: 0 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -1348,7 +1321,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 4 + Map 2 Map Operator Tree: TableScan alias: x @@ -1427,7 +1400,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 5 + Map 3 Map Operator Tree: TableScan alias: z @@ -1442,15 +1415,72 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 1 => 39 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2_n1 + numFiles 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct dest_j2_n1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### @@ -1506,73 +1536,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 2 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2_n1 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest_j2_n1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Reducer 3 + Reducer 4 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1804,9 +1768,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Map 4 <- Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1888,7 +1852,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 5 + Map 3 Map Operator Tree: TableScan alias: y @@ -1961,7 +1925,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 6 + Map 4 Map Operator Tree: TableScan alias: z @@ -1976,15 +1940,72 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true - Execution mode: vectorized, llap + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Reducer 2 => 64 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Position of Big Table: 1 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2_n1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2_n1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Execution mode: llap LLAP IO: no inputs Path -> Alias: #### A masked pattern was here #### @@ -2062,73 +2083,7 @@ STAGE PLANS: tag: 0 value expressions: _col0 (type: string) auto parallelism: true - Reducer 3 - Execution mode: llap - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 0 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col3 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2_n1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2_n1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 104 Data size: 27664 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - auto parallelism: false - Reducer 4 + Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -2363,9 +2318,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2389,7 +2344,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 2 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -2399,7 +2354,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: x @@ -2420,7 +2375,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: x @@ -2433,50 +2388,44 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 3 + LLAP IO: no inputs + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2662,9 +2611,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2688,7 +2637,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 2 Statistics: Num rows: 39 Data size: 6825 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) @@ -2698,7 +2647,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 4 + Map 2 Map Operator Tree: TableScan alias: x @@ -2719,7 +2668,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 5 + Map 3 Map Operator Tree: TableScan alias: y @@ -2732,50 +2681,44 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Reducer 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2_n1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2_n1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: key, value, val2 - Statistics: Num rows: 63 Data size: 16758 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll'), compute_stats(val2, 'hll') - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) - Reducer 3 + LLAP IO: no inputs + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index cce6519831..79e242dc4a 100644 --- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -559,7 +559,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -575,26 +575,15 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: y @@ -614,6 +603,28 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -642,7 +653,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -658,26 +669,14 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: y @@ -697,6 +696,28 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + 1 KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 71f3e2cd42..b1eec43b72 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -3824,9 +3824,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) + Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -3928,15 +3928,41 @@ STAGE PLANS: expressions: k1 (type: string), k2 (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string) - auto parallelism: true + Map Join Operator + condition map: + Left Outer Join 0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(_col2 = '1')} + 1 + Estimated key counts: Map 6 => 55 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + input vertices: + 1 Map 6 + Position of Big Table: 0 + Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + null sort order: aaaa + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: true Execution mode: vectorized Path -> Alias: hdfs://### HDFS PATH ### [a] @@ -3993,7 +4019,7 @@ STAGE PLANS: name: default.dst_union22_n0 Truncated Path -> Alias: /dst_union22_n0/ds=1 [a] - Map 7 + Map 6 Map Operator Tree: TableScan alias: dst_union22_delta_n0 @@ -4169,40 +4195,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 6 - Needs Tagging: false - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(VALUE._col1 = '1')} - 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 - Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 221 Data size: 49306 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - null sort order: aaaa - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 387 Data size: 108402 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - auto parallelism: true Union 2 Vertex: Union 2