diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java index 8c43774ab0..f268a131a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/CalciteSemanticException.java @@ -32,7 +32,7 @@ public enum UnsupportedFeature { Distinct_without_an_aggreggation, Duplicates_in_RR, Filter_expression_with_non_boolean_return_type, Having_clause_without_any_groupby, Invalid_column_reference, Invalid_decimal, - Less_than_equal_greater_than, Others, Same_name_in_multiple_expressions, + Others, Same_name_in_multiple_expressions, Schema_less_table, Select_alias_in_having_clause, Select_transform, Subquery, Table_sample_clauses, UDTF, Union_type, Unique_join, HighPrecissionTimestamp // CALCITE-1690 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 10f5eb3b5b..2e43c2ad71 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -478,12 +478,6 @@ public static SqlOperator getCalciteFn(String hiveUdfName, ImmutableList calciteArgTypes, RelDataType calciteRetType, boolean deterministic) throws CalciteSemanticException { - if (hiveUdfName != null && hiveUdfName.trim().equals("<=>")) { - // We can create Calcite IS_DISTINCT_FROM operator for this. But since our - // join reordering algo cant handle this anyway there is no advantage of - // this.So, bail out for now. - throw new CalciteSemanticException("<=> is not yet supported for cbo.", UnsupportedFeature.Less_than_equal_greater_than); - } SqlOperator calciteOp; CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); switch (hiveUdfName) { diff --git a/ql/src/test/queries/clientpositive/join_nullsafe.q b/ql/src/test/queries/clientpositive/join_nullsafe.q index e96cc71671..029d7ba578 100644 --- a/ql/src/test/queries/clientpositive/join_nullsafe.q +++ b/ql/src/test/queries/clientpositive/join_nullsafe.q @@ -1,4 +1,6 @@ set hive.explain.user=false; +set hive.mapred.mode=nonstrict; + -- SORT_QUERY_RESULTS CREATE TABLE myinput1(key int, value int); diff --git a/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out b/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out index 71685aa9db..ae5ef7b038 100644 --- a/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out +++ b/ql/src/test/results/clientpositive/llap/join_nullsafe.q.out @@ -14,6 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in8.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value @@ -27,7 +28,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -35,12 +36,14 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -48,12 +51,14 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -63,18 +68,16 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 key (type: int) - 1 value (type: int) - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 165 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col3) (type: boolean) + Statistics: Num rows: 4 Data size: 73 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 73 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -86,6 +89,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -105,6 +109,7 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key @@ -118,7 +123,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -129,31 +135,32 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) + sort order: Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -161,12 +168,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -175,24 +186,38 @@ STAGE PLANS: Merge Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: - 0 key (type: int) - 1 value (type: int) - 2 key (type: int) - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 165 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col3) (type: boolean) + Statistics: Num rows: 4 Data size: 73 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 73 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -200,6 +225,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key=c.key PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -210,6 +236,7 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 10 NULL NULL 10 10 NULL 100 100 100 100 100 100 +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key @@ -223,7 +250,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -231,38 +258,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int) - sort order: + - Map-reduce partition columns: value (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -273,23 +306,25 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 0 to 2 keys: - 0 key (type: int) - 1 value (type: int) - 2 key (type: int) - nullSafes: [true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 + 1 + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 729 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 = _col5) and (_col0 = _col2)) (type: boolean) + Statistics: Num rows: 6 Data size: 162 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 6 Data size: 162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -297,6 +332,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value join myinput1 c on a.key<=>c.key PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -358,11 +394,16 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -373,11 +414,16 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int), key (type: int) - sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 4 @@ -388,11 +434,16 @@ STAGE PLANS: Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -403,19 +454,17 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 0 to 2 keys: - 0 key (type: int), value (type: int) - 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) - nullSafes: [true, false] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + 0 _col1 (type: int) + 1 _col0 (type: int) + 2 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 = _col4) and (_col0 = _col3)) (type: boolean) + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,6 +486,7 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 +Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value @@ -450,7 +500,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -458,35 +508,44 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: int), key (type: int) - sort order: ++ - Map-reduce partition columns: value (type: int), key (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int), value (type: int) - sort order: ++ - Map-reduce partition columns: key (type: int), value (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -497,23 +556,25 @@ STAGE PLANS: Inner Join 0 to 1 Inner Join 0 to 2 keys: - 0 key (type: int), value (type: int) - 1 value (type: int), key (type: int) - 2 key (type: int), value (type: int) - nullSafes: [true, true] - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 57 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 + 1 + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 27 Data size: 729 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col4) and (_col0 = _col5) and (_col1 = _col3) and (_col0 = _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -521,6 +582,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key join myinput1 c on a.key<=>c.key AND a.value<=>c.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -533,6 +595,7 @@ POSTHOOK: Input: default@myinput1 100 100 100 100 100 100 NULL 10 10 NULL NULL 10 NULL NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -553,6 +616,7 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -573,6 +637,7 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -594,6 +659,7 @@ NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -613,6 +679,7 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM myinput1 a JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -690,6 +757,7 @@ POSTHOOK: Lineage: smb_input1.key SIMPLE [(smb_input)smb_input.FieldSchema(name: POSTHOOK: Lineage: smb_input1.value SIMPLE [(smb_input)smb_input.FieldSchema(name:value, type:int, comment:null), ] POSTHOOK: Lineage: smb_input2.key SIMPLE [(smb_input)smb_input.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: smb_input2.value SIMPLE [(smb_input)smb_input.FieldSchema(name:value, type:int, comment:null), ] +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -752,6 +820,7 @@ NULL 35 NULL NULL NULL NULL NULL 10050 NULL NULL NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -802,6 +871,7 @@ POSTHOOK: Input: default@smb_input1 NULL 10050 NULL 10050 NULL 35 NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -864,6 +934,7 @@ NULL 35 NULL NULL NULL NULL NULL 10050 NULL NULL NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -926,6 +997,7 @@ NULL 35 NULL NULL NULL NULL NULL 10050 NULL NULL NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -988,6 +1060,7 @@ NULL 35 NULL NULL NULL NULL NULL 10050 NULL NULL NULL 35 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -1009,6 +1082,7 @@ NULL 35 12 NULL NULL 35 NULL NULL NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input2 b ON a.key <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -1030,6 +1104,7 @@ NULL 35 12 NULL NULL 35 NULL NULL NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input2 b ON a.key <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -1072,6 +1147,7 @@ NULL 35 12 NULL NULL 35 NULL NULL NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input2 b ON a.key <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input1 @@ -1112,6 +1188,7 @@ NULL NULL 80 10040 NULL NULL 80 10040 NULL NULL NULL 10050 NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input2 @@ -1228,6 +1305,7 @@ NULL 10050 NULL 10050 NULL 35 NULL 35 NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(a) */ * FROM smb_input2 a RIGHT OUTER JOIN smb_input2 b ON a.value <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input2 @@ -1344,6 +1422,7 @@ NULL 10050 NULL 10050 NULL 35 NULL 35 NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a JOIN smb_input2 b ON a.value <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input2 @@ -1460,6 +1539,7 @@ NULL 10050 NULL 10050 NULL 35 NULL 35 NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT /*+ MAPJOIN(b) */ * FROM smb_input2 a LEFT OUTER JOIN smb_input2 b ON a.value <=> b.value PREHOOK: type: QUERY PREHOOK: Input: default@smb_input2 @@ -1576,6 +1656,7 @@ NULL 10050 NULL 10050 NULL 35 NULL 35 NULL NULL 12 NULL NULL NULL NULL NULL +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.key is NULL PREHOOK: type: QUERY POSTHOOK: query: explain select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.key is NULL @@ -1589,7 +1670,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1600,12 +1681,14 @@ STAGE PLANS: Filter Operator predicate: key is null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: null (type: int) - sort order: + - Map-reduce partition columns: null (type: int) + Select Operator + expressions: value (type: int) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1614,14 +1697,16 @@ STAGE PLANS: alias: b Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is null (type: boolean) + predicate: (null = value) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: null (type: int) - sort order: + - Map-reduce partition columns: null (type: int) + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: int) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1631,18 +1716,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 null (type: int) - 1 null (type: int) - nullSafes: [true] - outputColumnNames: _col1, _col5 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: null (type: int), _col1 (type: int), _col5 (type: int), null (type: int) + expressions: null (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1654,6 +1738,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.key is NULL PREHOOK: type: QUERY PREHOOK: Input: default@myinput1