diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java index 50b9c2bb6a..d970316699 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductHandler.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.plan.*; +import org.apache.tez.runtime.library.cartesianproduct.CartesianProductVertexManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; @@ -86,8 +87,21 @@ @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + HiveConf conf = pctx.getConf(); cartesianProductEdgeEnabled = - HiveConf.getBoolVar(pctx.getConf(), HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + HiveConf.getBoolVar(conf, HiveConf.ConfVars.TEZ_CARTESIAN_PRODUCT_EDGE_ENABLED); + // if max parallelism isn't set by user in llap mode, set it to number of executors + if (cartesianProductEdgeEnabled + && HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap") + && conf.get(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_MAX_PARALLELISM) == null) { + LlapClusterStateForCompile llapInfo = LlapClusterStateForCompile.getClusterInfo(conf); + llapInfo.initClusterInfo(); + if (llapInfo.hasClusterInfo()) { + conf.setInt(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_MAX_PARALLELISM, + llapInfo.getKnownExecutorCount()); + } + } + TaskGraphWalker ogw = new TaskGraphWalker(this); ArrayList topNodes = new ArrayList(); diff --git ql/src/test/queries/clientpositive/vectorized_mapjoin3.q ql/src/test/queries/clientpositive/vectorized_mapjoin3.q index 989f4cf06c..16e10ad9cd 100644 --- ql/src/test/queries/clientpositive/vectorized_mapjoin3.q +++ ql/src/test/queries/clientpositive/vectorized_mapjoin3.q @@ -43,7 +43,8 @@ SELECT JOIN table_27 t2 ON (t1.t_id = t2.t_id and t1.f_id = t2.f_id) JOIN (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 - ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')); + ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) + ORDER BY t1.t_id ASC; set hive.explain.user=false; diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 33126499b1..1daf5f2337 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -111,7 +111,7 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 -Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_0]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT t1.t_id, null @@ -120,6 +120,7 @@ PREHOOK: query: SELECT JOIN (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) + ORDER BY t1.t_id ASC PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Input: default@table_27 @@ -132,30 +133,31 @@ POSTHOOK: query: SELECT JOIN (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) + ORDER BY t1.t_id ASC POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Input: default@table_27 #### A masked pattern was here #### -J NULL -J NULL -I NULL -I NULL -H NULL -H NULL -G NULL -G NULL -F NULL -F NULL -E NULL -E NULL -D NULL -D NULL -C NULL -C NULL -B NULL -B NULL A NULL A NULL +B NULL +B NULL +C NULL +C NULL +D NULL +D NULL +E NULL +E NULL +F NULL +F NULL +G NULL +G NULL +H NULL +H NULL +I NULL +I NULL +J NULL +J NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.decimal0801_col FROM table_19 t1 diff --git ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out index d95728ae0f..6817861660 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin3.q.out @@ -111,7 +111,7 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 -Warning: Map Join MAPJOIN[25][bigTable=?] in task 'Stage-6:MAPRED' is a cross product +Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: SELECT t1.t_id, null @@ -120,6 +120,7 @@ PREHOOK: query: SELECT JOIN (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) + ORDER BY t1.t_id ASC PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Input: default@table_27 @@ -132,6 +133,7 @@ POSTHOOK: query: SELECT JOIN (SELECT 'TEST-1' id, 'TEST' r_type UNION ALL SELECT 'TEST-2' id, 'TEST' r_type) t3 ON (t3.r_type = t1.type and t3.id in ('TEST-1', 'TEST-2')) + ORDER BY t1.t_id ASC POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Input: default@table_27