diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index f7d165a..8a3647c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -21,6 +21,7 @@ import java.lang.management.ManagementFactory; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -38,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -77,6 +79,12 @@ public void load(MapJoinTableContainer[] mapJoinTables, Map parentToInput = desc.getParentToInput(); Map parentKeyCounts = desc.getParentKeyCounts(); + boolean isCrossProduct = false; + List joinExprs = desc.getKeys().values().iterator().next(); + if (joinExprs.size() == 0) { + isCrossProduct = true; + } + boolean useOptimizedTables = HiveConf.getBoolVar( hconf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); boolean useHybridGraceHashJoin = desc.isHybridHashJoin(); @@ -184,11 +192,18 @@ public void load(MapJoinTableContainer[] mapJoinTables, } } - MapJoinTableContainer tableContainer = useOptimizedTables - ? (useHybridGraceHashJoin ? new HybridHashTableContainer(hconf, keyCount, - memory, desc.getParentDataSizes().get(pos), nwayConf) - : new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0)) - : new HashMapWrapper(hconf, keyCount); + MapJoinTableContainer tableContainer; + if (useOptimizedTables) { + if (!useHybridGraceHashJoin || isCrossProduct) { + tableContainer = new MapJoinBytesTableContainer(hconf, valCtx, keyCount, 0); + } else { + tableContainer = new HybridHashTableContainer(hconf, keyCount, memory, + desc.getParentDataSizes().get(pos), nwayConf); + } + } else { + tableContainer = new HashMapWrapper(hconf, keyCount); + } + LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName()); while (kvReader.next()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 8ea1879..504e986 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -632,6 +632,10 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink); mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); + List joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next(); + if (joinExprs.size() == 0) { // In case of cross join, we disable hybrid grace hash join + mapJoinOp.getConf().setHybridHashJoin(false); + } Operator parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); diff --git a/ql/src/test/queries/clientpositive/cross_join.q b/ql/src/test/queries/clientpositive/cross_join.q index 8eb949e..0c0a551 100644 --- a/ql/src/test/queries/clientpositive/cross_join.q +++ b/ql/src/test/queries/clientpositive/cross_join.q @@ -5,3 +5,12 @@ explain select src.key from src join src src2; explain select src.key from src cross join src src2; -- appending condition is allowed explain select src.key from src cross join src src2 on src.key=src2.key; + +set hive.execution.engine=tez; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask.size=10000000; +set hive.mapjoin.hybridgrace.hashtable=true; + +explain select src.key from src join src src2; +explain select src.key from src cross join src src2; +explain select src.key from src cross join src src2 on src.key=src2.key; diff --git a/ql/src/test/results/clientpositive/tez/cross_join.q.out b/ql/src/test/results/clientpositive/tez/cross_join.q.out index 6d10c1e..431aa63 100644 --- a/ql/src/test/results/clientpositive/tez/cross_join.q.out +++ b/ql/src/test/results/clientpositive/tez/cross_join.q.out @@ -203,3 +203,190 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain select src.key from src join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product +PREHOOK: query: explain select src.key from src cross join src src2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select src.key from src cross join src src2 on src.key=src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 2 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +