diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index d7f069eaa7..bbe7fb0697 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -1851,4 +1851,24 @@ public static void setupPermissionsForBuiltinUDFs(String whiteListStr, String blackListStr) { system.setupPermissionsForUDFs(whiteListStr, blackListStr); } + + /** + * Function to invert non-equi function texts + * @param funcText + */ + public static String invertFuncText(final String funcText) { + // Reverse the text + switch (funcText) { + case "<": + return ">"; + case "<=": + return ">="; + case ">": + return "<"; + case ">=": + return "<="; + default: + return null; // helps identify unsupported functions + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index 2ebb149354..c010b18b61 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.hive.serde2.AbstractSerDe; @@ -172,4 +173,15 @@ public default StorageHandlerInfo getStorageHandlerInfo(Table table) throws Meta default LockType getLockType(WriteEntity writeEntity){ return LockType.EXCLUSIVE; } + + /** + * Test if the storage handler allows the push-down of join filter predicate to prune further the splits. + * + * @param syntheticFilterPredicate Join filter predicate. + * @return true if supports dynamic split pruning for the given predicate. + */ + + default boolean addDynamicSplitPruningEdge(ExprNodeDesc syntheticFilterPredicate) { + return false; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index a1401aac72..439fb75aca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -80,6 +80,7 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.slf4j.Logger; @@ -188,7 +189,12 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje // all partitions have been statically removed LOG.debug("No partition pruning necessary."); } - } else { + } else if (table.isNonNative() && + table.getStorageHandler().addDynamicSplitPruningEdge(desc.getPredicate())){ + generateEventOperatorPlan(ctx, parseContext, ts, column, + table.getCols().stream().filter(e -> e.getName().equals(column)). + map(e -> e.getType()).findFirst().get()); + } else { // semijoin LOG.debug("Column " + column + " is not a partition column"); if (semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx) && ts.getConf().getFilterExpr() != null) { @@ -291,7 +297,7 @@ private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOpe disableSemiJoin = true; } else { // Check the other side of the join, using the DynamicListContext - ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); + ExprNodeDesc exprNodeDesc = ctx.getKeyCol(); ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc); if (colExpr != null) { @@ -317,7 +323,7 @@ private boolean disableSemiJoinOptDueToExternalTable(HiveConf conf, TableScanOpe // Given a key, find the corresponding column name. private boolean getColumnInfo(DynamicListContext ctx, StringBuilder internalColName, StringBuilder colName, StringBuilder tabAlias) { - ExprNodeDesc exprNodeDesc = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); + ExprNodeDesc exprNodeDesc = ctx.getKeyCol(); ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(exprNodeDesc); if (colExpr == null) { @@ -435,7 +441,7 @@ private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext pars Operator parentOfRS = ctx.generator.getParentOperators().get(0); // we need the expr that generated the key of the reduce sink - ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); + ExprNodeDesc key = ctx.getKeyCol(); // we also need the expr for the partitioned table ExprNodeDesc partKey = ctx.parent.getChildren().get(0); @@ -524,11 +530,18 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex TableScanOperator ts, String keyBaseAlias, String internalColName, String colName, SemiJoinHint sjHint) throws SemanticException { + // Semijoin reduction for non-equi join not yet supported, check for it + ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) ctx.parent; + if (!(funcDesc.getGenericUDF() instanceof GenericUDFIn)) { + LOG.info("Semijoin reduction for non-equi joins is currently disabled."); + return false; + } + // we will put a fork in the plan at the source of the reduce sink Operator parentOfRS = ctx.generator.getParentOperators().get(0); // we need the expr that generated the key of the reduce sink - ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); + ExprNodeDesc key = ctx.getKeyCol(); assert colName != null; // Fetch the TableScan Operator. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index f8c7e18eb1..94879c9529 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -779,6 +779,15 @@ public DynamicListContext(ExprNodeDynamicListDesc desc, ExprNodeDesc parent, this.grandParent = grandParent; this.generator = generator; } + + public ExprNodeDesc getKeyCol() { + ExprNodeDesc keyCol = desc.getTarget(); + if (keyCol != null) { + return keyCol; + } + + return generator.getConf().getKeyCols().get(desc.getKeyIndex()); + } } public static class DynamicPartitionPrunerContext implements NodeProcessorCtx, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicListDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicListDesc.java index 676dfc9421..9bc5796d83 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicListDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDynamicListDesc.java @@ -31,15 +31,27 @@ Operator source; int keyIndex; + // Target name, if key is not set + ExprNodeDesc target; public ExprNodeDynamicListDesc() { } + // keyIndex is not used public ExprNodeDynamicListDesc(TypeInfo typeInfo, Operator source, - int keyIndex) { + int keyIndex, ExprNodeDesc target) { super(typeInfo); this.source = source; this.keyIndex = keyIndex; + this.target = target; + } + + public ExprNodeDynamicListDesc(TypeInfo typeInfo, Operator source, + int keyIndex) { + super(typeInfo); + this.source = source; + this.keyIndex = keyIndex; + this.target = null; } public void setSource(Operator source) { @@ -58,9 +70,13 @@ public int getKeyIndex() { return this.keyIndex; } + public ExprNodeDesc getTarget() { + return target; + } + @Override public ExprNodeDesc clone() { - return new ExprNodeDynamicListDesc(typeInfo, source, keyIndex); + return new ExprNodeDynamicListDesc(typeInfo, source, keyIndex, target); } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index e97e44796f..dcecbf432e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -179,66 +180,149 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List sourceKeys = source.getConf().getKeyCols(); List targetKeys = target.getConf().getKeyCols(); - if (sourceKeys.size() < 1) { - continue; - } - ExprNodeDesc syntheticExpr = null; - for (int i = 0; i < sourceKeys.size(); ++i) { - final ExprNodeDesc sourceKey = sourceKeys.get(i); + if (sourceKeys.size() > 0) { + for (int i = 0; i < sourceKeys.size(); ++i) { + final ExprNodeDesc sourceKey = sourceKeys.get(i); - List inArgs = new ArrayList<>(); - inArgs.add(sourceKey); + List inArgs = new ArrayList<>(); + inArgs.add(sourceKey); - ExprNodeDynamicListDesc dynamicExpr = + ExprNodeDynamicListDesc dynamicExpr = new ExprNodeDynamicListDesc(targetKeys.get(i).getTypeInfo(), target, i); - inArgs.add(dynamicExpr); + inArgs.add(dynamicExpr); - ExprNodeDesc syntheticInExpr = + ExprNodeDesc syntheticInExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in") - .getGenericUDF(), inArgs); - if (LOG.isDebugEnabled()) { - LOG.debug("Synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + syntheticInExpr + ")"); - } + .getGenericUDF(), inArgs); + if (LOG.isDebugEnabled()) { + LOG.debug("Synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + syntheticInExpr + ")"); + } - List andArgs = new ArrayList<>(); - if (syntheticExpr != null) { - andArgs.add(syntheticExpr); - } - andArgs.add(syntheticInExpr); - - if(sCtx.isExtended()) { - // Backtrack - List newExprs = createDerivatives(target.getParentOperators().get(0), targetKeys.get(i), sourceKey); - if (!newExprs.isEmpty()) { - if (LOG.isDebugEnabled()) { - for (ExprNodeDesc expr : newExprs) { - LOG.debug("Additional synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + expr + ")"); + List andArgs = new ArrayList<>(); + if (syntheticExpr != null) { + andArgs.add(syntheticExpr); + } + andArgs.add(syntheticInExpr); + + if (sCtx.isExtended()) { + // Backtrack + List newExprs = createDerivatives(target.getParentOperators().get(0), targetKeys.get(i), sourceKey); + if (!newExprs.isEmpty()) { + if (LOG.isDebugEnabled()) { + for (ExprNodeDesc expr : newExprs) { + LOG.debug("Additional synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + expr + ")"); + } } + andArgs.addAll(newExprs); } - andArgs.addAll(newExprs); } - } - if (andArgs.size() < 2) { - syntheticExpr = syntheticInExpr; - } else { - // Create AND expression - syntheticExpr = + if (andArgs.size() < 2) { + syntheticExpr = syntheticInExpr; + } else { + // Create AND expression + syntheticExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and") - .getGenericUDF(), andArgs); + .getGenericUDF(), andArgs); + } } } - Operator newFilter = createFilter(source, parent, parentRS, syntheticExpr); - parent = newFilter; + // Handle non-equi joins like <, <=, >, and >= + List residualFilters = join.getConf().getResidualFilterExprs(); + if (residualFilters != null && residualFilters.size() != 0 && + !(srcPos > 1 || targetPos > 1)) { // Either srcPos or targetPos is larger than 1, making this filter a complex one. + + for (ExprNodeDesc filter : residualFilters) { + if (!(filter instanceof ExprNodeGenericFuncDesc)) { + continue; + } + + ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) filter; + // filter should be of type <, >, <= or >= + if (getFuncText(funcDesc.getFuncText(), 1) == null) { + // unsupported + continue; + } + + final ExprNodeDesc sourceChild = funcDesc.getChildren().get(srcPos); + final ExprNodeDesc targetChild = funcDesc.getChildren().get(targetPos); + if (!(sourceChild instanceof ExprNodeColumnDesc && + targetChild instanceof ExprNodeColumnDesc)) { + continue; + } + // Create non-equi function. + List funcArgs = new ArrayList<>(); + ExprNodeDesc sourceKey = getRSColExprFromResidualFilter(sourceChild, join); + funcArgs.add(sourceKey); + final ExprNodeDynamicListDesc dynamicExpr = + new ExprNodeDynamicListDesc(targetChild.getTypeInfo(), target, 0, + getRSColExprFromResidualFilter(targetChild, join)); + funcArgs.add(dynamicExpr); + ExprNodeDesc funcExpr = + ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo(getFuncText(funcDesc.getFuncText(), srcPos)).getGenericUDF(), funcArgs); + + // TODO : deduplicate the code below. + if (LOG.isDebugEnabled()) { + LOG.debug(" Non-Equi Join Predicate " + funcExpr); + } + + List andArgs = new ArrayList<>(); + if (syntheticExpr != null) { + andArgs.add(syntheticExpr); + } + andArgs.add(funcExpr); + + // TODO : HIVE-21098 : Support for extended predicates + if (andArgs.size() < 2) { + syntheticExpr = funcExpr; + } else { + syntheticExpr = + ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and").getGenericUDF(), andArgs); + } + } + } + + if (syntheticExpr != null) { + Operator newFilter = createFilter(source, parent, parentRS, syntheticExpr); + parent = newFilter; + } } return null; } + private ExprNodeDesc getRSColExprFromResidualFilter(ExprNodeDesc childExpr, CommonJoinOperator join) { + ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(childExpr); + + final String joinColName = colExpr.getColumn(); + // use name to get the alias pos of parent and name in parent + final int aliasPos = join.getConf().getReversedExprs().get(joinColName); + final ExprNodeDesc rsColExpr = join.getColumnExprMap().get(joinColName); + + // Get the correct parent + final ReduceSinkOperator parentRS = (ReduceSinkOperator) (join.getParentOperators().get(aliasPos)); + + // Fetch the colExpr from parent + return parentRS.getColumnExprMap().get( + ExprNodeDescUtils.extractColName(rsColExpr)); + } + + // This function serves two purposes + // 1. As the name suggests, provides inverted function text for a given function text + // 2. If inversion fails, it can be inferred that the given function is not supported. + String getFuncText(String funcText, final int srcPos) { + if (srcPos == 0) { + return funcText; + } + + return FunctionRegistry.invertFuncText(funcText); + } + + // calculate filter propagation directions for each alias // L<->R for inner/semi join, L<-R for left outer join, R<-L for right outer // join diff --git a/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out b/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out index f900a01be4..6672fb5da3 100644 --- a/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out +++ b/ql/src/test/results/clientpositive/llap/cross_prod_1.q.out @@ -210,7 +210,7 @@ POSTHOOK: Input: default@x_n0 114 val_114 111 val_111 114 val_114 113 val_113 114 val_114 114 val_114 -Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from X_n0 as A join X_n0 as B on A.key 200000) and (d_moy = 2) and (d_year = 2000) and d_month_seq is not null) TableScan [TS_16] (rows=28 width=12) default@x1_date_dim,x1_date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Select Operator [SEL_123] (rows=28 width=8) + <-Select Operator [SEL_124] (rows=28 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_122] (rows=28 width=8) + Filter Operator [FIL_123] (rows=28 width=8) predicate:(d_date_sk is not null and d_month_seq is not null) TableScan [TS_13] (rows=28 width=8) default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - Dynamic Partitioning Event Operator [EVENT_128] (rows=2 width=8) - Group By Operator [GBY_127] (rows=2 width=8) + Dynamic Partitioning Event Operator [EVENT_129] (rows=2 width=8) + Group By Operator [GBY_128] (rows=2 width=8) Output:["_col0"],keys:_col0 - Select Operator [SEL_126] (rows=28 width=8) + Select Operator [SEL_127] (rows=28 width=8) Output:["_col0"] - Please refer to the previous Map Join Operator [MAPJOIN_124] - <-Map Join Operator [MAPJOIN_131] (rows=370371 width=4) - Conds:RS_30._col0=SEL_130._col0(Inner),Output:["_col6"] + Please refer to the previous Map Join Operator [MAPJOIN_125] + <-Map Join Operator [MAPJOIN_132] (rows=370371 width=4) + Conds:RS_30._col0=SEL_131._col0(Inner),Output:["_col6"] <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_30] PartitionCols:_col0 - Map Join Operator [MAPJOIN_100] (rows=6 width=228) - Conds:SEL_2._col1=RS_113._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)} + Map Join Operator [MAPJOIN_101] (rows=6 width=228) + Conds:SEL_2._col1=RS_114._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)} <-Reducer 3 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_113] + BROADCAST [RS_114] PartitionCols:_col1 - Select Operator [SEL_112] (rows=1 width=197) + Select Operator [SEL_113] (rows=1 width=197) Output:["_col0","_col1"] - Group By Operator [GBY_111] (rows=1 width=197) + Group By Operator [GBY_112] (rows=1 width=197) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 <-Map 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_110] + SHUFFLE [RS_111] PartitionCols:_col0 - Group By Operator [GBY_109] (rows=1 width=197) + Group By Operator [GBY_110] (rows=1 width=197) Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category - Filter Operator [FIL_108] (rows=18 width=197) + Filter Operator [FIL_109] (rows=18 width=197) predicate:i_category is not null TableScan [TS_3] (rows=18 width=197) default@x1_item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_current_price"] @@ -306,9 +306,9 @@ Stage-0 predicate:(i_category is not null and i_item_sk is not null) TableScan [TS_0] (rows=18 width=201) default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"] - <-Select Operator [SEL_130] (rows=123457 width=8) + <-Select Operator [SEL_131] (rows=123457 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_129] (rows=123457 width=8) + Filter Operator [FIL_130] (rows=123457 width=8) predicate:ss_item_sk is not null TableScan [TS_10] (rows=123457 width=8) default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] diff --git a/ql/src/test/results/clientpositive/llap/semijoin.q.out b/ql/src/test/results/clientpositive/llap/semijoin.q.out index 00bc6cec55..e8f72538a1 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -2921,7 +2921,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2_n33 POSTHOOK: Input: default@t3_n12 #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, value from src outr left semi join (select a.key, b.value from src a join (select distinct value from src) b on a.value > b.value group by a.key, b.value) inr on outr.key=inr.key and outr.value=inr.value @@ -3063,7 +3063,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[50][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select key, value from src outr left semi join (select a.key, b.value from src a join (select distinct value from src) b on a.value > b.value group by a.key, b.value) inr on outr.key=inr.key and outr.value=inr.value diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 07cc4dbabc..b9b489f24b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -4703,7 +4703,7 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4871,7 +4871,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[49][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[52][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 29d8bbfb48..b175aa97be 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -7191,7 +7191,7 @@ PREHOOK: query: drop table t1_n0 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table t1_n0 POSTHOOK: type: DROPTABLE -Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from src b where b.key not in @@ -7410,7 +7410,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[53][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from src b where b.key not in diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 1cf281afbd..2a684018df 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -79,7 +79,7 @@ POSTHOOK: Lineage: part_null_n0.p_partkey SCRIPT [] POSTHOOK: Lineage: part_null_n0.p_retailprice SCRIPT [] POSTHOOK: Lineage: part_null_n0.p_size SCRIPT [] POSTHOOK: Lineage: part_null_n0.p_type SCRIPT [] -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_size > (select avg(p_size) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -184,7 +184,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_size > (select avg(p_size) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -981,7 +981,7 @@ POSTHOOK: Input: default@part 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product PREHOOK: query: explain select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -1181,7 +1181,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size > (select first_value(p_size) over(partition by p_mfgr order by p_size) as fv from part order by fv limit 1) @@ -1956,7 +1956,7 @@ POSTHOOK: Input: default@part_null_n0 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2123,7 +2123,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part where p_size >= (select min(p_size) from part_null_n0 where part_null_n0.p_type = part.p_type) AND p_retailprice <= (select max(p_retailprice) from part_null_n0) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2477,8 +2477,8 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2663,8 +2663,8 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select key, count(*) from src where value <> (select max(value) from src) group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -2683,7 +2683,7 @@ POSTHOOK: Input: default@src 468 4 469 5 489 4 -Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select sum(p_retailprice) from part group by p_type having sum(p_retailprice) > (select max(pp.p_retailprice) from part pp) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2761,10 +2761,10 @@ STAGE PLANS: Select Operator expressions: _col1 (type: double) outputColumnNames: _col1 - Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Reducer 3 Execution mode: llap @@ -2808,7 +2808,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select sum(p_retailprice) from part group by p_type having sum(p_retailprice) > (select max(pp.p_retailprice) from part pp) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5357,7 +5357,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[33][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from emps_n4 where deptno <> (select count(deptno) from depts_n3 where depts_n3.name = emps_n4.name) and empno > (select count(name) from depts_n3) PREHOOK: type: QUERY PREHOOK: Input: default@depts_n3 @@ -5389,7 +5389,7 @@ POSTHOOK: query: drop table EMPS_n4 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@emps_n4 POSTHOOK: Output: default@emps_n4 -Warning: Shuffle Join MERGEJOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src @@ -5512,7 +5512,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[19][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select key, count(*) from src group by key @@ -5537,7 +5537,7 @@ POSTHOOK: Input: default@src 468 4 469 5 489 4 -Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: explain select key, value, count(*) from src b @@ -5719,7 +5719,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[46][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[51][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: select key, value, count(*) from src b where b.key in (select key from src where src.key > '8') @@ -5736,7 +5736,7 @@ having count(*) > (select count(*) from src s1 where s1.key > '9' ) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part where p_size > (select max(p_size) from part group by p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -6903,7 +6903,7 @@ POSTHOOK: query: drop table tempty_n0 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tempty_n0 POSTHOOK: Output: default@tempty_n0 -Warning: Shuffle Join MERGEJOIN[18][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 group by 4) PREHOOK: type: QUERY @@ -7032,7 +7032,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[25][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src group by key having count(*) > (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY @@ -7183,7 +7183,7 @@ POSTHOOK: query: CREATE TABLE `date_dim`( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@date_dim -Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) average_sales from (select ss_quantity quantity @@ -7226,7 +7226,7 @@ HiveProject(ss_sold_date_sk=[$0], ss_quantity=[$1], ss_list_price=[$2]) HiveFilter(condition=[AND(BETWEEN(false, $1, 1999, 2001), IS NOT NULL($0))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Shuffle Join MERGEJOIN[68][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[73][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain cbo with avg_sales as (select avg(quantity*list_price) over( partition by list_price) average_sales from (select ss_quantity quantity diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 6255abdd70..9650815b80 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -4239,7 +4239,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### true -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select o.p_size, (select count(distinct p_type) from part p where p.p_partkey = o.p_partkey) tmp FROM part o right join (select * from part where p_size > (select avg(p_size) from part)) t on t.p_partkey = o.p_partkey PREHOOK: type: QUERY @@ -4423,7 +4423,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[44][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select o.p_size, (select count(distinct p_type) from part p where p.p_partkey = o.p_partkey) tmp FROM part o right join (select * from part where p_size > (select avg(p_size) from part)) t on t.p_partkey = o.p_partkey PREHOOK: type: QUERY @@ -5202,7 +5202,7 @@ POSTHOOK: Input: default@part 6 28 6 28 7 28 -Warning: Shuffle Join MERGEJOIN[137][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type @@ -5319,7 +5319,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - filterExpr: p_partkey is not null (type: boolean) + filterExpr: (p_partkey is not null or p_partkey is not null or p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_partkey is not null (type: boolean) @@ -5334,6 +5334,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: p_partkey (type: int) @@ -5346,6 +5349,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_partkey (type: int) mode: hash @@ -5564,7 +5570,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[137][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[135][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select t1.p_size, (select count(*) from part t2 where t2.p_partkey = t1.p_partkey group by t2.p_partkey), (select count(*) from part p, part pp where p.p_size = pp.p_size and p.p_type = pp.p_type