diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 093b4a73f3..535a56b95b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2140,7 +2140,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "is also left in the operator tree at the original place."), HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true, "Whether to transform OR clauses in Filter operators into IN clauses"), - HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, + HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 2, "Minimum number of OR clauses needed to transform into IN clauses"), HIVECOUNTDISTINCTOPTIMIZER("hive.optimize.countdistinct", true, "Whether to transform count distinct into two stages"), diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 97e405970f..c50c698046 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -3669,6 +3669,10 @@ private long getIntFamilyScalarAsLong(ExprNodeConstantDesc constDesc) return (Short) o; } else if (o instanceof Integer) { return (Integer) o; + } else if (o instanceof Short) { + return (Short) o; + } else if (o instanceof Byte) { + return (Byte) o; } else if (o instanceof Long) { return (Long) o; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index f544f58632..bc479697f2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -340,13 +340,19 @@ private RexNode convert(ExprNodeGenericFuncDesc func) throws SemanticException { } else if (HiveFloorDate.ALL_FUNCTIONS.contains(calciteOp)) { // If it is a floor operator, we need to rewrite it childRexNodeLst = rewriteFloorDateChildren(calciteOp, childRexNodeLst); - } else if (calciteOp.getKind() == SqlKind.IN && childRexNodeLst.size() == 2 && isAllPrimitive) { - // if it is a single item in an IN clause, transform A IN (B) to A = B - // from IN [A,B] => EQUALS [A,B] - // except complex types - calciteOp = - SqlFunctionConverter.getCalciteOperator("=", FunctionRegistry.getFunctionInfo("=") - .getGenericUDF(), argTypeBldr.build(), retType); + } else if (calciteOp.getKind() == SqlKind.IN && isAllPrimitive) { + if (childRexNodeLst.size() == 2) { + // if it is a single item in an IN clause, transform A IN (B) to A = B + // from IN [A,B] => EQUALS [A,B] + // except complex types + calciteOp = SqlStdOperatorTable.EQUALS; + } else if (RexUtil.isReferenceOrAccess(childRexNodeLst.get(0), true)) { + // if it is more than an single item in an IN clause, + // transform from IN [A,B,C] => OR [EQUALS [A,B], EQUALS [A,C]] + // except complex types + childRexNodeLst = rewriteInClauseChildren(calciteOp, childRexNodeLst); + calciteOp = SqlStdOperatorTable.OR; + } } expr = cluster.getRexBuilder().makeCall(retType, calciteOp, childRexNodeLst); } else { @@ -377,8 +383,9 @@ private boolean castExprUsingUDFBridge(GenericUDF gUDF) { if (udfClassName.equals("UDFToBoolean") || udfClassName.equals("UDFToByte") || udfClassName.equals("UDFToDouble") || udfClassName.equals("UDFToInteger") || udfClassName.equals("UDFToLong") || udfClassName.equals("UDFToShort") - || udfClassName.equals("UDFToFloat")) + || udfClassName.equals("UDFToFloat")) { castExpr = true; + } } } } @@ -527,6 +534,19 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c return newChildRexNodeLst; } + private List rewriteInClauseChildren(SqlOperator op, List childRexNodeLst) + throws SemanticException { + assert op.getKind() == SqlKind.IN; + RexNode firstPred = childRexNodeLst.get(0); + List newChildRexNodeLst = new ArrayList(); + for (int i = 1; i < childRexNodeLst.size(); i++) { + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCall( + SqlStdOperatorTable.EQUALS, firstPred, childRexNodeLst.get(i))); + } + return newChildRexNodeLst; + } + private static boolean checkForStatefulFunctions(List list) { for (ExprNodeDesc node : list) { if (node instanceof ExprNodeGenericFuncDesc) { @@ -562,8 +582,9 @@ private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException { } } - if (noInp > 1) + if (noInp > 1) { throw new RuntimeException("Ambiguous column mapping"); + } } return ctxLookingFor; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index fa941a1b25..1a86294f2c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.parse; import java.math.BigDecimal; - import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -72,6 +71,7 @@ import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; @@ -88,6 +88,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; @@ -357,7 +358,9 @@ public static ExprNodeConstantDesc createDecimal(String strVal, boolean notNull) // not be desirable for the literals; however, this used to be the default behavior // for explicit decimal literals (e.g. 1.0BD), so we keep this behavior for now. HiveDecimal hd = HiveDecimal.create(strVal); - if (notNull && hd == null) return null; + if (notNull && hd == null) { + return null; + } int prec = 1; int scale = 0; if (hd != null) { @@ -752,7 +755,7 @@ private static ExprNodeConstantDesc toPrimitiveConstDesc(ColumnInfo colInfo, Obj constantExpr.setFoldedFromCol(colInfo.getInternalName()); return constantExpr; } - + private static ExprNodeConstantDesc toListConstDesc(ColumnInfo colInfo, ObjectInspector inspector, ObjectInspector listElementOI) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector)listElementOI; @@ -761,12 +764,12 @@ private static ExprNodeConstantDesc toListConstDesc(ColumnInfo colInfo, ObjectIn for (Object o : values) { constant.add(poi.getPrimitiveJavaObject(o)); } - + ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), constant); constantExpr.setFoldedFromCol(colInfo.getInternalName()); return constantExpr; } - + private static ExprNodeConstantDesc toMapConstDesc(ColumnInfo colInfo, ObjectInspector inspector, ObjectInspector keyOI, ObjectInspector valueOI) { PrimitiveObjectInspector keyPoi = (PrimitiveObjectInspector)keyOI; @@ -776,7 +779,7 @@ private static ExprNodeConstantDesc toMapConstDesc(ColumnInfo colInfo, ObjectIns for (Map.Entry e : values.entrySet()) { constant.put(keyPoi.getPrimitiveJavaObject(e.getKey()), valuePoi.getPrimitiveJavaObject(e.getValue())); } - + ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), constant); constantExpr.setFoldedFromCol(colInfo.getInternalName()); return constantExpr; @@ -791,7 +794,7 @@ private static ExprNodeConstantDesc toStructConstDesc(ColumnInfo colInfo, Object PrimitiveObjectInspector fieldPoi = (PrimitiveObjectInspector) fields.get(i).getFieldObjectInspector(); constant.add(fieldPoi.getPrimitiveJavaObject(value)); } - + ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), constant); constantExpr.setFoldedFromCol(colInfo.getInternalName()); return constantExpr; @@ -1030,8 +1033,9 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList); } else if (funcText.equals("[")) { // "[]" : LSQUARE/INDEX Expression - if (!ctx.getallowIndexExpr()) + if (!ctx.getallowIndexExpr()) { throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + } assert (children.size() == 2); @@ -1134,47 +1138,44 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, && children.get(1) instanceof ExprNodeColumnDesc) || (children.get(0) instanceof ExprNodeColumnDesc && children.get(1) instanceof ExprNodeConstantDesc))) { - int constIdx = - children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; - - String constType = children.get(constIdx).getTypeString().toLowerCase(); - String columnType = children.get(1 - constIdx).getTypeString().toLowerCase(); - final PrimitiveTypeInfo colTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(columnType); - // Try to narrow type of constant - Object constVal = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); - try { - if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, new ExprNodeConstantDesc(new Integer(constVal.toString()))); - } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, new ExprNodeConstantDesc(new Long(constVal.toString()))); - }else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, new ExprNodeConstantDesc(new Double(constVal.toString()))); - } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, new ExprNodeConstantDesc(new Float(constVal.toString()))); - } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, new ExprNodeConstantDesc(new Byte(constVal.toString()))); - } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, new ExprNodeConstantDesc(new Short(constVal.toString()))); - } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(colTypeInfo.getPrimitiveTypeEntry()) && (constVal instanceof Number || constVal instanceof String)) { - children.set(constIdx, NumExprProcessor.createDecimal(constVal.toString(),false)); + + int constIdx = children.get(0) instanceof ExprNodeConstantDesc ? 0 : 1; + + ExprNodeDesc constChild = children.get(constIdx); + ExprNodeDesc columnChild = children.get(1 - constIdx); + + final PrimitiveTypeInfo colTypeInfo = + TypeInfoFactory.getPrimitiveTypeInfo(columnChild.getTypeString().toLowerCase()); + ExprNodeDesc newChild = interpretNodeAs(colTypeInfo, constChild); + if (newChild == null) { + // non-interpretabe as that type... + if (genericUDF instanceof GenericUDFOPEqual) { + return new ExprNodeConstantDesc(false); + } + } else { + children.set(constIdx, newChild); } - } catch (NumberFormatException nfe) { - LOG.trace("Failed to narrow type of constant", nfe); - if ((genericUDF instanceof GenericUDFOPEqual && !NumberUtils.isNumber(constVal.toString()))) { - return new ExprNodeConstantDesc(false); + } + if (genericUDF instanceof GenericUDFIn && children.get(0) instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc columnDesc = (ExprNodeColumnDesc) children.get(0); + final PrimitiveTypeInfo colTypeInfo = + TypeInfoFactory.getPrimitiveTypeInfo(columnDesc.getTypeString().toLowerCase()); + List outputOpList = children.subList(1, children.size()); + ArrayList inOperands = new ArrayList<>(outputOpList); + outputOpList.clear(); + + for (ExprNodeDesc oldChild : inOperands) { + if(oldChild !=null && oldChild instanceof ExprNodeConstantDesc) { + ExprNodeDesc newChild = interpretNodeAs(colTypeInfo, oldChild); + if(newChild == null) { + // non interpretable as target type; skip + continue; + } + outputOpList.add(newChild); + }else{ + outputOpList.add(oldChild); } } - - // if column type is char and constant type is string, then convert the constant to char - // type with padded spaces. - if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && - colTypeInfo instanceof CharTypeInfo) { - final Object originalValue = ((ExprNodeConstantDesc) children.get(constIdx)).getValue(); - final String constValue = originalValue.toString(); - final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo); - final HiveChar newValue = new HiveChar(constValue, length); - children.set(constIdx, new ExprNodeConstantDesc(colTypeInfo, newValue)); - } } if (genericUDF instanceof GenericUDFOPOr) { // flatten OR @@ -1238,6 +1239,50 @@ protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, return desc; } + private ExprNodeDesc interpretNodeAs(PrimitiveTypeInfo colTypeInfo, ExprNodeDesc constChild) { + if (constChild instanceof ExprNodeConstantDesc) { + // Try to narrow type of constant + Object constVal = ((ExprNodeConstantDesc) constChild).getValue(); + String constType = constChild.getTypeString().toLowerCase(); + if (constVal instanceof Number || constVal instanceof String) { + try { + PrimitiveTypeEntry primitiveTypeEntry = colTypeInfo.getPrimitiveTypeEntry(); + if (PrimitiveObjectInspectorUtils.intTypeEntry.equals(primitiveTypeEntry)) { + return new ExprNodeConstantDesc(new Integer(constVal.toString())); + } else if (PrimitiveObjectInspectorUtils.longTypeEntry.equals(primitiveTypeEntry)) { + return new ExprNodeConstantDesc(new Long(constVal.toString())); + } else if (PrimitiveObjectInspectorUtils.doubleTypeEntry.equals(primitiveTypeEntry)) { + return new ExprNodeConstantDesc(new Double(constVal.toString())); + } else if (PrimitiveObjectInspectorUtils.floatTypeEntry.equals(primitiveTypeEntry)) { + return new ExprNodeConstantDesc(new Float(constVal.toString())); + } else if (PrimitiveObjectInspectorUtils.byteTypeEntry.equals(primitiveTypeEntry)) { + return new ExprNodeConstantDesc(new Byte(constVal.toString())); + } else if (PrimitiveObjectInspectorUtils.shortTypeEntry.equals(primitiveTypeEntry)) { + return new ExprNodeConstantDesc(new Short(constVal.toString())); + } else if (PrimitiveObjectInspectorUtils.decimalTypeEntry.equals(primitiveTypeEntry)) { + return NumExprProcessor.createDecimal(constVal.toString(), false); + } + } catch (NumberFormatException nfe) { + LOG.trace("Failed to narrow type of constant", nfe); + if (!NumberUtils.isNumber(constVal.toString())) { + return null; + } + } + } + + // if column type is char and constant type is string, then convert the constant to char + // type with padded spaces. + if (constType.equalsIgnoreCase(serdeConstants.STRING_TYPE_NAME) && colTypeInfo instanceof CharTypeInfo) { + final Object originalValue = ((ExprNodeConstantDesc) constChild).getValue(); + final String constValue = originalValue.toString(); + final int length = TypeInfoUtils.getCharacterLengthForType(colTypeInfo); + final HiveChar newValue = new HiveChar(constValue, length); + return new ExprNodeConstantDesc(colTypeInfo, newValue); + } + } + return constChild; + } + private boolean canConvertIntoNvl(GenericUDF genericUDF, ArrayList children) { if (genericUDF instanceof GenericUDFWhen && children.size() == 3 && children.get(1) instanceof ExprNodeConstantDesc && @@ -1351,9 +1396,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * return null; */ if (windowingTokens.contains(expr.getType())) { - if (!ctx.getallowWindowing()) + if (!ctx.getallowWindowing()) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context"))); + } return null; } @@ -1367,10 +1413,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - if (!ctx.getallowAllColRef()) + if (!ctx.getallowAllColRef()) { throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.INVALID_COLUMN .getMsg("All column reference is not supported in the context"))); + } RowResolver input = ctx.getInputRR(); ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc(); @@ -1439,10 +1486,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) { - if (!ctx.getallowFunctionStar()) - throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, - ErrorMsg.INVALID_COLUMN - .getMsg(".* reference is not supported in the context"))); + if (!ctx.getallowFunctionStar()) { + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg(".* reference is not supported in the context"))); + } RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { @@ -1509,10 +1557,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ASTNode expr = (ASTNode) nd; ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); - if (!ctx.getallowSubQueryExpr()) + if (!ctx.getallowSubQueryExpr()) { throw new CalciteSubquerySemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg("Currently SubQuery expressions are only allowed as " + "Where and Having Clause predicates"))); + } ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { diff --git ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java index b57b5ddc2c..b705fd7f88 100644 --- ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java +++ ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java @@ -32,11 +32,12 @@ import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper; -import org.apache.hadoop.hive.ql.plan.mapper.StatsSources; import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup; +import org.apache.hadoop.hive.ql.plan.mapper.StatsSources; import org.apache.hadoop.hive.ql.reexec.ReExecDriver; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.OperatorStats; @@ -150,6 +151,52 @@ public void testUsageOfRuntimeInfo() throws ParseException { } @Test + public void testInConversion() throws ParseException { + String query = + "explain select sum(id_uv) from tu where u in (1,2) group by u"; + + HiveConf conf = env_setup.getTestCtx().hiveConf; + conf.setIntVar(ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN, 10); + IDriver driver = createDriver(); + + PlanMapper pm = getMapperForQuery(driver, query); + List fos = pm.getAll(FilterOperator.class); + OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0)); + Object pred = filterSig.getSig().getSigMap().get("getPredicateString"); + + assertEquals("((u = 1) or (u = 2)) (type: boolean)", pred); + + } + + @Test + public void testBreakupAnd() throws ParseException { + String query = + "explain select sum(id_uv) from tu where u=1 and (u=2 or u=1) group by u"; + + IDriver driver = createDriver(); + PlanMapper pm = getMapperForQuery(driver, query); + List fos = pm.getAll(FilterOperator.class); + OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0)); + Object pred = filterSig.getSig().getSigMap().get("getPredicateString"); + assertEquals("(u = 1) (type: boolean)", pred); + } + + @Test + public void testBreakupAnd2() throws ParseException { + String query = + "explain select sum(id_uv) from tu where u in (1,2,3) and u=2 and u=2 and 2=u group by u"; + + IDriver driver = createDriver(); + PlanMapper pm = getMapperForQuery(driver, query); + List fos = pm.getAll(FilterOperator.class); + OpTreeSignature filterSig = pm.lookup(OpTreeSignature.class, fos.get(0)); + Object pred = filterSig.getSig().getSigMap().get("getPredicateString"); + assertEquals("(u = 2) (type: boolean)", pred); + + } + + + @Test @Ignore("needs HiveFilter mapping") public void testMappingJoinLookup() throws ParseException { IDriver driver = createDriver(); diff --git ql/src/test/queries/clientpositive/pointlookup.q ql/src/test/queries/clientpositive/pointlookup.q index 1b65cec71c..a808a51cbf 100644 --- ql/src/test/queries/clientpositive/pointlookup.q +++ ql/src/test/queries/clientpositive/pointlookup.q @@ -1,4 +1,7 @@ --! qt:dataset:src + +set hive.optimize.point.lookup.min=31; + explain SELECT key FROM src @@ -123,4 +126,4 @@ or inOutputOpt.key = null; drop table orOutput; drop table inOutput; -drop table inOutputOpt; \ No newline at end of file +drop table inOutputOpt; diff --git ql/src/test/queries/clientpositive/pointlookup2.q ql/src/test/queries/clientpositive/pointlookup2.q index fe19381368..4f2f8970d0 100644 --- ql/src/test/queries/clientpositive/pointlookup2.q +++ ql/src/test/queries/clientpositive/pointlookup2.q @@ -1,5 +1,7 @@ --! qt:dataset:src +set hive.optimize.point.lookup.min=31; set hive.mapred.mode=nonstrict; + drop table pcr_t1_n2; drop table pcr_t2_n0; drop table pcr_t3; @@ -130,4 +132,4 @@ order by t1.key, t1.value, t2.ds; drop table pcr_t1_n2; drop table pcr_t2_n0; -drop table pcr_t3; \ No newline at end of file +drop table pcr_t3; diff --git ql/src/test/queries/clientpositive/pointlookup3.q ql/src/test/queries/clientpositive/pointlookup3.q index f98feeb164..1e061c332f 100644 --- ql/src/test/queries/clientpositive/pointlookup3.q +++ ql/src/test/queries/clientpositive/pointlookup3.q @@ -1,4 +1,5 @@ --! qt:dataset:src +set hive.optimize.point.lookup.min=31; set hive.mapred.mode=nonstrict; drop table pcr_t1_n1; diff --git ql/src/test/results/clientpositive/alter_partition_coltype.q.out ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 5d033a3c01..5727f0a65c 100644 --- ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -539,9 +539,6 @@ PREHOOK: query: explain extended select intcol from pt.alterdynamic_part_table w PREHOOK: type: QUERY POSTHOOK: query: explain extended select intcol from pt.alterdynamic_part_table where (partcol1='2' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `intcol` -FROM `pt`.`alterdynamic_part_table` -WHERE `partcol1` = 2 AND `partcol2` = '1' OR `partcol1` = 1 AND `partcol2` = '__HIVE_DEFAULT_PARTITION__' STAGE DEPENDENCIES: Stage-0 is a root stage @@ -600,7 +597,7 @@ STAGE PLANS: Processor Tree: TableScan alias: alterdynamic_part_table - filterExpr: (((partcol1 = 2) and (partcol2 = '1')) or ((partcol1 = 1) and (partcol2 = '__HIVE_DEFAULT_PARTITION__'))) (type: boolean) + filterExpr: (struct(partcol1,partcol2)) IN (const struct(2,'1'), const struct(1,'__HIVE_DEFAULT_PARTITION__')) (type: boolean) Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator diff --git ql/src/test/results/clientpositive/annotate_stats_filter.q.out ql/src/test/results/clientpositive/annotate_stats_filter.q.out index 54395886d2..dd42cf044c 100644 --- ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -649,18 +649,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - filterExpr: ((state = 'OH') or (state = 'CA')) (type: boolean) + filterExpr: (state) IN ('OH', 'CA') (type: boolean) Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((state = 'CA') or (state = 'OH')) (type: boolean) - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (state) IN ('OH', 'CA') (type: boolean) + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/annotate_stats_part.q.out ql/src/test/results/clientpositive/annotate_stats_part.q.out index bafc6de51e..1c67a65b97 100644 --- ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -241,7 +241,7 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_n4 - filterExpr: ((year = '2001') or (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean) + filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean) Statistics: Num rows: 8 Data size: 3814 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) diff --git ql/src/test/results/clientpositive/auto_join19.q.out ql/src/test/results/clientpositive/auto_join19.q.out index 3e07ec06de..e04c3bfe6f 100644 --- ql/src/test/results/clientpositive/auto_join19.q.out +++ ql/src/test/results/clientpositive/auto_join19.q.out @@ -53,7 +53,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (((ds = '2008-04-08') or (ds = '2008-04-09')) and ((hr = '12') or (hr = '11')) and key is not null) (type: boolean) + filterExpr: ((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11') and key is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out index 2e7d79660b..43cb5ab89f 100644 --- ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_rp_simple_select.q.out @@ -915,22 +915,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean) Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -954,22 +955,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean) Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: key, value, c_int, c_float, c_boolean, dt - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/cbo_simple_select.q.out ql/src/test/results/clientpositive/cbo_simple_select.q.out index 33f0e71080..2073c6b802 100644 --- ql/src/test/results/clientpositive/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/cbo_simple_select.q.out @@ -915,22 +915,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean) Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -954,22 +955,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean) Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean) + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3660 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 7138 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/druid_intervals.q.out ql/src/test/results/clientpositive/druid_intervals.q.out index a5203c3182..715623ad61 100644 --- ql/src/test/results/clientpositive/druid_intervals.q.out +++ ql/src/test/results/clientpositive/druid_intervals.q.out @@ -375,7 +375,7 @@ STAGE PLANS: properties: druid.fieldNames vc,robot druid.fieldTypes timestamp with local time zone,string - druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"in","dimension":"__time","values":["2010-01-01T08:00:00.000Z","2011-01-01T08:00:00.000Z"],"extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"robot","value":"user1"}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"} + druid.query.json {"queryType":"scan","dataSource":"wikipedia","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"filter":{"type":"or","fields":[{"type":"selector","dimension":"robot","value":"user1"},{"type":"selector","dimension":"__time","value":"2010-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}},{"type":"selector","dimension":"__time","value":"2011-01-01T08:00:00.000Z","extractionFn":{"type":"timeFormat","format":"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'","timeZone":"UTC"}}]},"virtualColumns":[{"type":"expression","name":"vc","expression":"\"__time\"","outputType":"LONG"}],"columns":["vc","robot"],"resultFormat":"compactedList"} druid.query.type scan Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index 97922c2636..88b24a7501 100644 --- ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -194,9 +194,6 @@ PREHOOK: query: explain extended select intcol from dynamic_part_table where (pa PREHOOK: type: QUERY POSTHOOK: query: explain extended select intcol from dynamic_part_table where (partcol1='1' and partcol2='1')or (partcol1='1' and partcol2='__HIVE_DEFAULT_PARTITION__') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `intcol` -FROM `default`.`dynamic_part_table` -WHERE `partcol1` = '1' AND (`partcol2` = '1' OR `partcol2` = '__HIVE_DEFAULT_PARTITION__') STAGE DEPENDENCIES: Stage-0 is a root stage @@ -302,7 +299,7 @@ STAGE PLANS: Processor Tree: TableScan alias: dynamic_part_table - filterExpr: ((partcol1 = '1') and ((partcol2 = '1') or (partcol2 = '__HIVE_DEFAULT_PARTITION__'))) (type: boolean) + filterExpr: ((partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') and (partcol1 = '1')) (type: boolean) Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator diff --git ql/src/test/results/clientpositive/filter_cond_pushdown.q.out ql/src/test/results/clientpositive/filter_cond_pushdown.q.out index b84a2d4b79..12e157f783 100644 --- ql/src/test/results/clientpositive/filter_cond_pushdown.q.out +++ ql/src/test/results/clientpositive/filter_cond_pushdown.q.out @@ -36,20 +36,20 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: f - filterExpr: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean) + filterExpr: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((value = '2008-04-08') or (value = '2008-04-09')) and key is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: ((value) IN ('2008-04-08', '2008-04-09') and key is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -143,11 +143,11 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - alias: m - filterExpr: ((value <> '') and key is not null) (type: boolean) + alias: f + filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value <> '') and key is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -160,11 +160,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) TableScan - alias: f - filterExpr: (((value) IN ('2008-04-08', '2008-04-10') or (value = '2008-04-09')) and key is not null) (type: boolean) + alias: m + filterExpr: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((value) IN ('2008-04-08', '2008-04-10') or (value = '2008-04-09')) and key is not null) (type: boolean) + predicate: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -183,11 +183,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col3) IN ('2008-04-08', '2008-04-10') and (_col1 = '2008-04-08')) or (_col3 = '2008-04-09')) (type: boolean) - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col1 = '2008-04-08') or (_col1 = '2008-04-10')) and (_col3 = '2008-04-08')) or (_col1 = '2008-04-09')) (type: boolean) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -200,11 +200,11 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 412 Data size: 4376 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string) + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: g filterExpr: (value <> '') (type: boolean) @@ -226,17 +226,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col3 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2, _col4 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col4 (type: string) + expressions: _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -419,10 +419,10 @@ STAGE PLANS: value expressions: _col1 (type: string) TableScan alias: m - filterExpr: ((value <> '') and ((value = '2008-04-10') or (value = '2008-04-08')) and key is not null) (type: boolean) + filterExpr: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((value = '2008-04-10') or (value = '2008-04-08')) and (value <> '') and key is not null) (type: boolean) + predicate: ((value <> '') and key is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -444,15 +444,15 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col1) IN ('2008-04-08', '2008-04-10') and (_col1) IN ('2008-04-08', '2008-04-09') and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean) - Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col1 = '2008-04-08') or (_col1 = '2008-04-10')) and ((_col1 = '2008-04-08') or (_col1 = '2008-04-09')) and (_col3 = '2008-04-10')) or (_col3 = '2008-04-08')) (type: boolean) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 344 Data size: 3654 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out index d06fb60345..add0694581 100644 --- ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out +++ ql/src/test/results/clientpositive/fold_eq_with_case_when.q.out @@ -139,18 +139,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src - filterExpr: ((key = '238') or (key = '94')) (type: boolean) + filterExpr: (key) IN ('238', '94') (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = '238') or (key = '94')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN ('238', '94') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 98ad3656e7..5edc2029a0 100644 --- ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -379,9 +379,6 @@ PREHOOK: query: explain extended SELECT key, value FROM fact_daily_n2 WHERE ds=' PREHOOK: type: QUERY POSTHOOK: query: explain extended SELECT key, value FROM fact_daily_n2 WHERE ds='1' and hr='4' and ( (key='484' and value ='val_484') or (key='238' and value= 'val_238')) POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value` -FROM `default`.`fact_daily_n2` -WHERE `ds` = '1' AND `hr` = '4' AND (`key` = '484' AND `value` = 'val_484' OR `key` = '238' AND `value` = 'val_238') STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -392,23 +389,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: fact_daily_n2 - filterExpr: ((ds = '1') and (hr = '4') and (((key = '484') and (value = 'val_484')) or ((key = '238') and (value = 'val_238')))) (type: boolean) + filterExpr: ((struct(key,value)) IN (const struct('484','val_484'), const struct('238','val_238')) and (ds = '1') and (hr = '4')) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = '238') and (value = 'val_238')) or ((key = '484') and (value = 'val_484'))) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,value)) IN (const struct('484','val_484'), const struct('238','val_238')) (type: boolean) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -431,6 +428,55 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition + base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + hr 4 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily_n2 + numFiles 3 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct fact_daily_n2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.fact_daily_n2 + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct fact_daily_n2 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.fact_daily_n2 + name: default.fact_daily_n2 +#### A masked pattern was here #### + Partition base file name: value=val_238 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -528,6 +574,7 @@ STAGE PLANS: name: default.fact_daily_n2 name: default.fact_daily_n2 Truncated Path -> Alias: + /fact_daily_n2/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily_n2] /fact_daily_n2/ds=1/hr=4/key=238/value=val_238 [fact_daily_n2] /fact_daily_n2/ds=1/hr=4/key=484/value=val_484 [fact_daily_n2] diff --git ql/src/test/results/clientpositive/llap/bucketpruning1.q.out ql/src/test/results/clientpositive/llap/bucketpruning1.q.out index cc637db05b..260ba1cbdd 100644 --- ql/src/test/results/clientpositive/llap/bucketpruning1.q.out +++ ql/src/test/results/clientpositive/llap/bucketpruning1.q.out @@ -788,9 +788,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from srcbucket_pruned where (key=1 or key=2) and ds='2008-04-08' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` -FROM `default`.`srcbucket_pruned` -WHERE (`key` = 1 OR `key` = 2) AND `ds` = '2008-04-08' STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -804,12 +801,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((key) IN (1, 2) and (ds = '2008-04-08')) (type: boolean) + buckets included: [4,13,] of 16 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = 1) or (key = 2)) and (ds = '2008-04-08')) (type: boolean) + predicate: ((ds = '2008-04-08') and (key) IN (1, 2)) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string), '2008-04-08' (type: string) @@ -852,9 +850,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from srcbucket_pruned where (key=1 or key=2) and value = 'One' and ds='2008-04-08' POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, CAST('One' AS STRING) AS `value`, CAST('2008-04-08' AS STRING) AS `ds` -FROM `default`.`srcbucket_pruned` -WHERE (`key` = 1 OR `key` = 2) AND `value` = 'One' AND `ds` = '2008-04-08' STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -868,12 +863,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: (((key = 1) or (key = 2)) and (value = 'One') and (ds = '2008-04-08')) (type: boolean) + filterExpr: ((key) IN (1, 2) and (value = 'One') and (ds = '2008-04-08')) (type: boolean) + buckets included: [4,13,] of 16 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Filter Operator isSamplingPred: false - predicate: (((key = 1) or (key = 2)) and (ds = '2008-04-08') and (value = 'One')) (type: boolean) + predicate: ((ds = '2008-04-08') and (key) IN (1, 2) and (value = 'One')) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), 'One' (type: string), '2008-04-08' (type: string) @@ -1293,9 +1289,6 @@ PREHOOK: type: QUERY POSTHOOK: query: explain extended select * from srcbucket_pruned where key = 1 and ds='2008-04-08' and (value='One' or value = 'Two') POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(1 AS INTEGER) AS `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds` -FROM `default`.`srcbucket_pruned` -WHERE `key` = 1 AND `ds` = '2008-04-08' AND (`value` = 'One' OR `value` = 'Two') STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1309,12 +1302,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcbucket_pruned - filterExpr: ((key = 1) and (ds = '2008-04-08') and ((value = 'One') or (value = 'Two'))) (type: boolean) + filterExpr: ((value) IN ('One', 'Two') and (key = 1) and (ds = '2008-04-08')) (type: boolean) + buckets included: [13,] of 16 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL GatherStats: false Filter Operator isSamplingPred: false - predicate: (((value = 'One') or (value = 'Two')) and (ds = '2008-04-08') and (key = 1)) (type: boolean) + predicate: ((ds = '2008-04-08') and (key = 1) and (value) IN ('One', 'Two')) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: 1 (type: int), value (type: string), '2008-04-08' (type: string) diff --git ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index 1330a86426..45260654f8 100644 --- ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -85,27 +85,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + filterExpr: (key) IN (0, 5) (type: boolean) Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 358 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 537 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + filterExpr: (key) IN (0, 5) (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -113,16 +113,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap Reducer 2 @@ -131,10 +131,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -143,7 +143,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) outputColumnNames: key, value, ds - Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string) @@ -299,27 +299,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2_n19 - filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + filterExpr: (key) IN (0, 5) (type: boolean) Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: test_table1_n20 - filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + filterExpr: (key) IN (0, 5) (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -327,16 +327,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 549 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap Reducer 2 @@ -345,10 +345,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -357,7 +357,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) outputColumnNames: key, value, ds - Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string) @@ -519,27 +519,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2_n19 - filterExpr: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) + filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean) Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: test_table1_n20 - filterExpr: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) + filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean) Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -547,16 +547,28 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 183 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 2 Data size: 366 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> asf/master Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> asf/master value expressions: _col1 (type: string) Execution mode: llap Reducer 2 @@ -565,10 +577,17 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 2 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> asf/master table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -577,7 +596,11 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) outputColumnNames: key, value, ds +<<<<<<< HEAD + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 2 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> asf/master Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string) diff --git ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out index a35edb42a8..e61300b5c8 100644 --- ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/llap/cbo_simple_select.q.out @@ -851,9 +851,9 @@ STAGE PLANS: Processor Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean) Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean) Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -875,9 +875,9 @@ STAGE PLANS: Processor Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean) Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) + predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean) Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index 123a3e46fc..ec1ed64fe8 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1931,10 +1931,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n0 - filterExpr: (((de = 103) or (de = 119)) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) + filterExpr: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((de = 103) or (de = 119)) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) + predicate: ((de) IN (103, 119) and enforce_constraint((893.14 >= CAST( i AS decimal(5,2))) is not false)) (type: boolean) Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) diff --git ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 8f06ee58ce..78eded3371 100644 --- ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -5927,10 +5927,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n0 - filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 2944 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 1472 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index e03cd3437e..1cfa61336d 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3233,10 +3233,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_uami_n1 - filterExpr: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean) + filterExpr: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean) Statistics: Num rows: 1002 Data size: 225450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (((de = 109.23) or (de = 119.23)) and enforce_constraint(vc is not null)) (type: boolean) + predicate: ((de) IN (109.23, 119.23) and enforce_constraint(vc is not null)) (type: boolean) Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ROW__ID (type: struct), i (type: int), vc (type: varchar(128)) diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 4db83c149d..a98191653f 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1648,7 +1648,7 @@ Stage-0 Select Operator [SEL_2] Output:["_col0"] Filter Operator [FIL_4] - predicate:((c_int = -6) or (c_int = 6)) + predicate:(c_int) IN (-6, 6) TableScan [TS_0] Output:["key","c_int"] diff --git ql/src/test/results/clientpositive/llap/kryo.q.out ql/src/test/results/clientpositive/llap/kryo.q.out index 234bae89c7..764a9147d3 100644 --- ql/src/test/results/clientpositive/llap/kryo.q.out +++ ql/src/test/results/clientpositive/llap/kryo.q.out @@ -44,10 +44,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: datetime_ - filterExpr: ((id = 1) or (id = 2) or (id = 3) or (id = 4) or (id = 5) or (id = 6)) (type: boolean) + filterExpr: (id) IN (1, 2, 3, 4, 5, 6) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((id = 1) or (id = 2) or (id = 3) or (id = 4) or (id = 5) or (id = 6)) (type: boolean) + predicate: (id) IN (1, 2, 3, 4, 5, 6) (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: if(((id = 1) or (id = 2)), if((id = 1), date_, date_), if((id = 3), CASE WHEN ((date_ is null or to_date(datetime) is null)) THEN (null) WHEN ((CAST( date_ AS DATE) > to_date(datetime))) THEN (date_) ELSE (to_date(datetime)) END, null)) (type: string), id (type: int), CASE WHEN ((id = 6)) THEN (CASE WHEN ((concat(date_, ' 00:00:00') is null or datetime is null)) THEN (null) WHEN ((concat(date_, ' 00:00:00') > datetime)) THEN (concat(date_, ' 00:00:00')) ELSE (datetime) END) WHEN ((id = 5)) THEN (CASE WHEN ((date_ is null or datetime is null)) THEN (null) WHEN ((date_ > datetime)) THEN (date_) ELSE (datetime) END) WHEN ((id = 3)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 4)) THEN (concat(date_, ' 00:00:00')) WHEN ((id = 1)) THEN (date_) WHEN ((id = 2)) THEN (date_) ELSE (null) END (type: string) diff --git ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out index 88ddd9c076..5b1584e96b 100644 --- ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out +++ ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out @@ -126,10 +126,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_n0 - filterExpr: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean) + filterExpr: (cdecimal1) IN (3.35, 4.46) (type: boolean) Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean) + predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean) Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ @@ -227,10 +227,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_n0 - filterExpr: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean) + filterExpr: (cdecimal1) IN (3.35, 4.46) (type: boolean) Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((cdecimal1 = 3.35) or (cdecimal1 = 4.46)) (type: boolean) + predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean) Statistics: Num rows: 24576 Data size: 5505024 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ diff --git ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out index 1841f1f4d3..f0cbf42d22 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb.q.out @@ -1504,10 +1504,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((d_year >= 1992) and (d_year <= 1997) and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_year >= 1992) and (d_year <= 1997)) (type: boolean) Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (d_year <= 1997) and (d_year >= 1992)) (type: boolean) + predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_year <= 1997) and (d_year >= 1992) and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean) Statistics: Num rows: 1 Data size: 380 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(lo_revenue) @@ -1613,10 +1613,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((d_yearmonth = 'Dec1997') and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997')) (type: boolean) Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (d_yearmonth = 'Dec1997')) (type: boolean) + predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean) Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_city (type: string), d_year (type: int), s_city (type: string), lo_revenue (type: double) @@ -1730,10 +1730,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + filterExpr: ((p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 748 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) + predicate: ((c_region = 'AMERICA') and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 748 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_nation (type: string), d_year (type: int), net_revenue (type: double) @@ -1845,10 +1845,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((d_year = 1997) or (d_year = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + filterExpr: ((d_year) IN (1997, 1998) and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((d_year = 1997) or (d_year = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) + predicate: ((c_region = 'AMERICA') and (d_year) IN (1997, 1998) and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_year (type: int), p_category (type: string), s_nation (type: string), net_revenue (type: double) @@ -1960,10 +1960,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv_n0 - filterExpr: ((c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES') and ((d_year = 1997) or (d_year = 1998))) (type: boolean) + filterExpr: ((d_year) IN (1997, 1998) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean) Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((d_year = 1997) or (d_year = 1998)) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean) + predicate: ((c_region = 'AMERICA') and (d_year) IN (1997, 1998) and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean) Statistics: Num rows: 1 Data size: 932 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_year (type: int), p_brand1 (type: string), s_city (type: string), net_revenue (type: double) diff --git ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out index d7c92d8c59..c265c4161b 100644 --- ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out +++ ql/src/test/results/clientpositive/llap/materialized_view_rewrite_ssb_2.q.out @@ -1506,10 +1506,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997) and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (UDFToInteger(d_year) >= 1992) and (UDFToInteger(d_year) <= 1997)) (type: boolean) Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992)) (type: boolean) + predicate: ((UDFToInteger(d_year) <= 1997) and (UDFToInteger(d_year) >= 1992) and (c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean) Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) @@ -1619,10 +1619,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((d_yearmonth = 'Dec1997') and ((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5'))) (type: boolean) + filterExpr: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (s_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997')) (type: boolean) Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((c_city = 'UNITED KI1') or (c_city = 'UNITED KI5')) and ((s_city = 'UNITED KI1') or (s_city = 'UNITED KI5')) and (d_yearmonth = 'Dec1997')) (type: boolean) + predicate: ((c_city) IN ('UNITED KI1', 'UNITED KI5') and (d_yearmonth = 'Dec1997') and (s_city) IN ('UNITED KI1', 'UNITED KI5')) (type: boolean) Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_city (type: string), s_city (type: string), UDFToInteger(d_year) (type: int), lo_revenue (type: double) @@ -1736,10 +1736,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + filterExpr: ((p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) + predicate: ((c_region = 'AMERICA') and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 928 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(d_year) (type: int), c_nation (type: string), net_revenue (type: double) @@ -1851,10 +1851,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((c_region = 'AMERICA') and (s_region = 'AMERICA') and ((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2'))) (type: boolean) + filterExpr: ((UDFToInteger(d_year)) IN (1997, 1998) and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and ((p_mfgr = 'MFGR#1') or (p_mfgr = 'MFGR#2')) and (c_region = 'AMERICA') and (s_region = 'AMERICA')) (type: boolean) + predicate: ((UDFToInteger(d_year)) IN (1997, 1998) and (c_region = 'AMERICA') and (p_mfgr) IN ('MFGR#1', 'MFGR#2') and (s_region = 'AMERICA')) (type: boolean) Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(d_year) (type: int), s_nation (type: string), p_category (type: string), net_revenue (type: double) @@ -1966,10 +1966,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default.ssb_mv - filterExpr: ((c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES') and ((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998))) (type: boolean) + filterExpr: ((UDFToInteger(d_year)) IN (1997, 1998) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean) Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToInteger(d_year) = 1997) or (UDFToInteger(d_year) = 1998)) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean) + predicate: ((UDFToInteger(d_year)) IN (1997, 1998) and (c_region = 'AMERICA') and (p_category = 'MFGR#14') and (s_nation = 'UNITED STATES')) (type: boolean) Statistics: Num rows: 1 Data size: 1112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(d_year) (type: int), s_city (type: string), p_brand1 (type: string), net_revenue (type: double) diff --git ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index 65eec521a2..54ccf58442 100644 --- ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -747,12 +747,12 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 50 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_7: 50 - RECORDS_OUT_OPERATOR_FS_9: 1 - RECORDS_OUT_OPERATOR_GBY_8: 1 + RECORDS_OUT_OPERATOR_FIL_8: 50 + RECORDS_OUT_OPERATOR_FS_12: 1 + RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 - RECORDS_OUT_OPERATOR_RS_3: 50 - RECORDS_OUT_OPERATOR_SEL_2: 50 + RECORDS_OUT_OPERATOR_RS_10: 50 + RECORDS_OUT_OPERATOR_SEL_9: 50 RECORDS_OUT_OPERATOR_TS_0: 1100 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1079 diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 801dda315a..3bfd1aaddc 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -144,7 +144,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean) + filterExpr: ((cdate <> DATE'1969-10-26') and (cdate <> DATE'1969-07-14') and (cdate <> DATE'1970-01-21')) (type: boolean) Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -152,15 +152,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 5:boolean) - predicate: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean) - Statistics: Num rows: 12284 Data size: 653589 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterDateColNotEqualDateScalar(col 3:date, val -67), FilterDateColNotEqualDateScalar(col 3:date, val -171), FilterDateColNotEqualDateScalar(col 3:date, val 20)) + predicate: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean) + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 12284 Data size: 653589 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -350,7 +350,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) + filterExpr: ((cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> -3367.6517567568)) (type: boolean) Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -358,15 +358,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean) - predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) - Statistics: Num rows: 12274 Data size: 1306115 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568)) + predicate: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135)) (type: boolean) + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 12274 Data size: 1306115 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1108,13 +1108,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (cdate) IN (DATE'1969-10-26', DATE'1969-07-14') (type: boolean) + expressions: ((cdate = DATE'1969-10-26') or (cdate = DATE'1969-07-14')) (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 5:boolean + projectedOutputColumnNums: [7] + selectExpressions: ColOrCol(col 5:boolean, col 6:boolean)(children: DateColEqualDateScalar(col 3:date, date 1969-10-26) -> 5:boolean, DateColEqualDateScalar(col 3:date, date 1969-07-14) -> 6:boolean) -> 7:boolean Statistics: Num rows: 12289 Data size: 653856 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1122,7 +1122,7 @@ STAGE PLANS: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:boolean + keyExpressions: col 7:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1246,13 +1246,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + expressions: ((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)) (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean + projectedOutputColumnNums: [8] + selectExpressions: VectorUDFAdaptor(((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)))(children: DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946) -> 5:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135) -> 6:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568) -> 7:boolean) -> 8:boolean Statistics: Num rows: 12289 Data size: 1307712 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1260,7 +1260,7 @@ STAGE PLANS: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:boolean + keyExpressions: col 8:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1287,7 +1287,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out index 54d9914caa..9697f55f75 100644 --- ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_decimal.q.out @@ -63,7 +63,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_decimal - filterExpr: (UDFToDouble(id)) IN (1.0E8D, 2.0E8D) (type: boolean) + filterExpr: (id) IN (100000000, 200000000) (type: boolean) Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -71,8 +71,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterDoubleColumnInList(col 3:double, values [1.0E8, 2.0E8])(children: CastDecimalToDouble(col 2:decimal(18,0))(children: ConvertDecimal64ToDecimal(col 0:decimal(18,0)/DECIMAL_64) -> 2:decimal(18,0)) -> 3:double) - predicate: (UDFToDouble(id)) IN (1.0E8D, 2.0E8D) (type: boolean) + predicateExpression: FilterDecimal64ColumnInList(col 0:decimal(18,0)/DECIMAL_64, values [, decimal64Val 100000000, decimalVal 100000000, decimal64Val 200000000, decimalVal 200000000]) + predicate: (id) IN (100000000, 200000000) (type: boolean) Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: decimal(18,0)) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out index 725ed34acb..157d184a0f 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out @@ -10238,7 +10238,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_n6 - filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -10247,8 +10247,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck)) - predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean) + predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck) + predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: s (type: string), dec (type: decimal(4,2)) @@ -10516,7 +10516,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_n6 - filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -10525,8 +10525,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck)) - predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean) + predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck) + predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: s (type: string) @@ -10783,7 +10783,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_n6 - filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -10792,8 +10792,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck)) - predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean) + predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck) + predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: s (type: string), bo (type: boolean) @@ -11056,7 +11056,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_n6 - filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -11065,8 +11065,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck)) - predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean) + predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck) + predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: i (type: int) @@ -11336,7 +11336,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_n6 - filterExpr: ((s = 'tom allen') or (s = 'bob steinbeck')) (type: boolean) + filterExpr: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -11345,8 +11345,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val tom allen), FilterStringGroupColEqualStringScalar(col 7:string, val bob steinbeck)) - predicate: ((s = 'bob steinbeck') or (s = 'tom allen')) (type: boolean) + predicateExpression: FilterStringColumnInList(col 7, values tom allen, bob steinbeck) + predicate: (s) IN ('tom allen', 'bob steinbeck') (type: boolean) Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: s (type: string), dec (type: decimal(4,2)) diff --git ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out index 74ac56d1c6..1871216134 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out @@ -1410,7 +1410,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over10k_n7 - filterExpr: (((s = 'oscar allen') or (s = 'oscar carson')) and (t = 10Y)) (type: boolean) + filterExpr: ((s) IN ('oscar allen', 'oscar carson') and (t = 10Y)) (type: boolean) Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -1419,8 +1419,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 7:string, val oscar allen), FilterStringGroupColEqualStringScalar(col 7:string, val oscar carson)), FilterLongColEqualLongScalar(col 0:tinyint, val 10)) - predicate: (((s = 'oscar allen') or (s = 'oscar carson')) and (t = 10Y)) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 7, values oscar allen, oscar carson), FilterLongColEqualLongScalar(col 0:tinyint, val 10)) + predicate: ((s) IN ('oscar allen', 'oscar carson') and (t = 10Y)) (type: boolean) Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToByte(10) (type: tinyint), s (type: string) diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index d444ae86a1..2591c288b3 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean) + filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -60,9 +60,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583)) - predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean) - Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583]) + predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) + Statistics: Num rows: 7 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -71,13 +71,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 17, 21] selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string - Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -201,7 +201,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean) + filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) Statistics: Num rows: 12288 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -210,9 +210,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583)) - predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean) - Statistics: Num rows: 6 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583]) + predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) + Statistics: Num rows: 7 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -221,13 +221,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 18, 24] selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string - Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 2228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7 Data size: 2600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index ba004e9716..edbe8bd62b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -6775,10 +6775,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n2 - filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 2944 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 2 Data size: 1472 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) diff --git ql/src/test/results/clientpositive/pcr.q.out ql/src/test/results/clientpositive/pcr.q.out index 919b71234d..68a58bd0cc 100644 --- ql/src/test/results/clientpositive/pcr.q.out +++ ql/src/test/results/clientpositive/pcr.q.out @@ -1460,11 +1460,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-0 PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(14 AS INTEGER) AS `key`, `value` -FROM (SELECT `value` -FROM `default`.`pcr_t1` -WHERE (`ds` = '2000-04-08' OR `ds` = '2000-04-09') AND `key` = 14 -ORDER BY `value`) AS `t1` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1475,7 +1470,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: (((ds = '2000-04-08') or (ds = '2000-04-09')) and (key = 14)) (type: boolean) + filterExpr: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -1647,10 +1642,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04 PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value` -FROM `default`.`pcr_t1` -WHERE `ds` = '2000-04-08' OR `ds` = '2000-04-09' -ORDER BY `key`, `value` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1661,7 +1652,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: ((ds = '2000-04-08') or (ds = '2000-04-09')) (type: boolean) + filterExpr: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator @@ -2162,10 +2153,6 @@ PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='20 PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value`, `ds` -FROM `default`.`pcr_t1` -WHERE `ds` = '2000-04-08' AND `key` = 1 OR `ds` = '2000-04-09' AND `key` = 2 -ORDER BY `key`, `value`, `ds` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2176,22 +2163,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (struct(ds)) IN (struct('2000-04-08'), struct('2000-04-09'))) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Execution mode: vectorized @@ -2302,13 +2289,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4919,11 +4906,6 @@ PREHOOK: query: explain extended select key, value, ds, hr from srcpart where ds PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` -FROM (SELECT `key`, `value`, `hr` -FROM `default`.`srcpart` -WHERE `ds` = '2008-04-08' AND (`hr` = '11' OR `hr` = '12') AND `key` = 11 -ORDER BY `key`, `hr`) AS `t1` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4934,7 +4916,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ((ds = '2008-04-08') and ((hr = '11') or (hr = '12')) and (UDFToDouble(key) = 11.0D)) (type: boolean) + filterExpr: ((hr) IN ('11', '12') and (ds = '2008-04-08') and (UDFToDouble(key) = 11.0D)) (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator diff --git ql/src/test/results/clientpositive/perf/spark/query13.q.out ql/src/test/results/clientpositive/perf/spark/query13.q.out index fb2a061c63..cb27d1e191 100644 --- ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -109,22 +109,22 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 10 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int), hd_dep_count (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + expressions: s_store_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col3 (type: int) + 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -134,23 +134,23 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 8 Map Operator Tree: TableScan - alias: store - filterExpr: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: s_store_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE + expressions: hd_demo_sk (type: int), hd_dep_count (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 _col4 (type: int) + 0 _col2 (type: int) + 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -158,65 +158,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49) - Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 3 (PARTITION-LEVEL SORT, 138) - Reducer 5 <- Map 10 (PARTITION-LEVEL SORT, 17), Reducer 4 (PARTITION-LEVEL SORT, 17) - Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 147), Reducer 2 (PARTITION-LEVEL SORT, 147) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 166), Reducer 3 (PARTITION-LEVEL SORT, 166) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 - Map Operator Tree: - TableScan - alias: customer_demographics - filterExpr: (((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and ((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string) - Execution mode: vectorized - Map 2 + Map 1 Map Operator Tree: TableScan alias: store_sales - filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) + filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_hdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)), ss_ext_sales_price (type: decimal(7,2)), ss_ext_wholesale_cost (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col4 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10 - input vertices: - 0 Map 1 - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -235,26 +203,62 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: customer_demographics + filterExpr: ((cd_marital_status) IN ('M', 'D', 'U') and (cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Execution mode: vectorized Map 9 Map Operator Tree: TableScan alias: customer_address - filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_state (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) Reducer 3 Local Work: Map Reduce Local Work @@ -265,62 +269,54 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14 + Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4, _col6, _col7, _col8, _col9, _col10, _col14 + outputColumnNames: _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col13, _col14, _col16 input vertices: 1 Map 8 - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col10 (type: decimal(7,2)), _col14 (type: int) + Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) (type: boolean) + Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 10647918 Data size: 939362419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col6, _col7, _col8, _col9, _col10, _col14, _col16 - Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) (type: boolean) - Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: int), _col7 (type: decimal(7,2)), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)), _col14 (type: int) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col8, _col9, _col14, _col19, _col20 - Statistics: Num rows: 17178642 Data size: 1515504822 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col4, _col5, _col7, _col8, _col9, _col18 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) (type: boolean) - Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col6 (type: int), _col8 (type: decimal(7,2)), _col9 (type: decimal(7,2)) - outputColumnNames: _col6, _col8, _col9 - Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col4 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col7, _col8 + input vertices: + 1 Map 10 + Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col6), count(_col6), sum(_col8), count(_col8), sum(_col9), count(_col9) + aggregations: sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col8), count(_col8) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE @@ -328,7 +324,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query15.q.out ql/src/test/results/clientpositive/perf/spark/query15.q.out index 3d6fbdac77..67684f6b0b 100644 --- ql/src/test/results/clientpositive/perf/spark/query15.q.out +++ ql/src/test/results/clientpositive/perf/spark/query15.q.out @@ -157,7 +157,7 @@ STAGE PLANS: outputColumnNames: _col3, _col4, _col7 Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) + predicate: ((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean) Statistics: Num rows: 348467716 Data size: 47189528877 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col4 (type: string), _col7 (type: decimal(7,2)) diff --git ql/src/test/results/clientpositive/perf/spark/query34.q.out ql/src/test/results/clientpositive/perf/spark/query34.q.out index b40081e4f0..c65087ea53 100644 --- ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -72,15 +72,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: household_demographics - filterExpr: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) + filterExpr: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 600 Data size: 64200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 600 Data size: 64200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col2 (type: int) diff --git ql/src/test/results/clientpositive/perf/spark/query48.q.out ql/src/test/results/clientpositive/perf/spark/query48.q.out index 60a4767a14..38ccff2a66 100644 --- ql/src/test/results/clientpositive/perf/spark/query48.q.out +++ ql/src/test/results/clientpositive/perf/spark/query48.q.out @@ -138,7 +138,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 9 Map Operator Tree: TableScan alias: store @@ -153,8 +153,8 @@ STAGE PLANS: Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: int) - 1 _col3 (type: int) + 0 _col3 (type: int) + 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work @@ -162,45 +162,33 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 49), Map 7 (PARTITION-LEVEL SORT, 49) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 55), Reducer 3 (PARTITION-LEVEL SORT, 55) - Reducer 5 <- Map 9 (PARTITION-LEVEL SORT, 138), Reducer 4 (PARTITION-LEVEL SORT, 138) - Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 133), Map 6 (PARTITION-LEVEL SORT, 133) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 147), Reducer 2 (PARTITION-LEVEL SORT, 147) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 319), Reducer 3 (PARTITION-LEVEL SORT, 319) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: store_sales - filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) + filterExpr: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_store_sk is not null and ss_cdemo_sk is not null and ss_addr_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE + predicate: ((ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_cdemo_sk (type: int), ss_addr_sk (type: int), ss_store_sk (type: int), ss_quantity (type: int), ss_net_profit (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6 - Statistics: Num rows: 63999513 Data size: 5646055611 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col5, _col7 - input vertices: - 0 Map 1 - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 70399465 Data size: 6210661306 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 191998545 Data size: 16938167362 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) Execution mode: vectorized - Local Work: - Map Reduce Local Work - Map 7 + Map 6 Map Operator Tree: TableScan alias: date_dim @@ -219,7 +207,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: customer_demographics @@ -238,77 +226,85 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 9 + Map 8 Map Operator Tree: TableScan alias: customer_address - filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ca_address_sk (type: int), ca_state (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col7 - Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col1, _col2, _col3, _col4, _col6 + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 77439413 Data size: 6831727584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col5 (type: int), _col7 (type: decimal(7,2)) - Reducer 4 + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 211198404 Data size: 18631984502 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) + Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col7 - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col3, _col4, _col6 + Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 85183356 Data size: 7514900505 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: int), _col7 (type: decimal(7,2)) - Reducer 5 + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 232318249 Data size: 20495183396 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)) + Reducer 4 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col5, _col7, _col14 - Statistics: Num rows: 93701693 Data size: 8266390734 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col3, _col4, _col6, _col13 + Statistics: Num rows: 255550079 Data size: 22544702224 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) (type: boolean) - Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col5 (type: int) - outputColumnNames: _col5 - Statistics: Num rows: 15616947 Data size: 1377731627 Basic stats: COMPLETE Column stats: NONE + predicate: ((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000)) (type: boolean) + Statistics: Num rows: 85183359 Data size: 7514900682 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4 + input vertices: + 1 Map 9 + Statistics: Num rows: 93701696 Data size: 8266390929 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col5) + aggregations: sum(_col4) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -316,7 +312,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/perf/spark/query53.q.out ql/src/test/results/clientpositive/perf/spark/query53.q.out index 2b1cdfea98..9959c1aa1b 100644 --- ql/src/test/results/clientpositive/perf/spark/query53.q.out +++ ql/src/test/results/clientpositive/perf/spark/query53.q.out @@ -86,7 +86,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403) Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) Reducer 5 <- Reducer 4 (SORT, 1) @@ -116,20 +116,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: item - filterExpr: (((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + filterExpr: (((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'reference') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + predicate: (((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'reference') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_manufact_id (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: int) Execution mode: vectorized Map 7 diff --git ql/src/test/results/clientpositive/perf/spark/query63.q.out ql/src/test/results/clientpositive/perf/spark/query63.q.out index b506455dbf..642c383a5d 100644 --- ql/src/test/results/clientpositive/perf/spark/query63.q.out +++ ql/src/test/results/clientpositive/perf/spark/query63.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 399), Map 6 (PARTITION-LEVEL SORT, 399) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 403), Map 6 (PARTITION-LEVEL SORT, 403) Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 529) Reducer 5 <- Reducer 4 (SORT, 1) @@ -118,20 +118,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: item - filterExpr: (((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and (((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and i_item_sk is not null) (type: boolean) + filterExpr: (((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'refernece') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + predicate: (((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'refernece') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_manager_id (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 115500 Data size: 165890114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: int) Execution mode: vectorized Map 7 diff --git ql/src/test/results/clientpositive/perf/spark/query71.q.out ql/src/test/results/clientpositive/perf/spark/query71.q.out index bf9c06debf..0b353bdeb6 100644 --- ql/src/test/results/clientpositive/perf/spark/query71.q.out +++ ql/src/test/results/clientpositive/perf/spark/query71.q.out @@ -193,20 +193,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: time_dim - filterExpr: (((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) (type: boolean) + filterExpr: ((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) (type: boolean) Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) (type: boolean) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + predicate: ((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) (type: boolean) + Statistics: Num rows: 43200 Data size: 20347200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: t_time_sk (type: int), t_hour (type: int), t_minute (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 43200 Data size: 20347200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 86400 Data size: 40694400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 43200 Data size: 20347200 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized Map 8 diff --git ql/src/test/results/clientpositive/perf/spark/query73.q.out ql/src/test/results/clientpositive/perf/spark/query73.q.out index 20ec874e88..e242fee974 100644 --- ql/src/test/results/clientpositive/perf/spark/query73.q.out +++ ql/src/test/results/clientpositive/perf/spark/query73.q.out @@ -86,15 +86,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: household_demographics - filterExpr: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) + filterExpr: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 600 Data size: 64200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 600 Data size: 64200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col2 (type: int) diff --git ql/src/test/results/clientpositive/perf/spark/query85.q.out ql/src/test/results/clientpositive/perf/spark/query85.q.out index 572ba54f78..ea1e463029 100644 --- ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -166,8 +166,7 @@ limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -175,47 +174,42 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 14 Map Operator Tree: TableScan - alias: reason - filterExpr: r_reason_sk is not null (type: boolean) - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + alias: web_page + filterExpr: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: r_reason_sk is not null (type: boolean) - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + predicate: wp_web_page_sk is not null (type: boolean) + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: r_reason_sk (type: int), r_reason_desc (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE + expressions: wp_web_page_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col4 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 11 + Map 15 Map Operator Tree: TableScan - alias: web_page - filterExpr: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + alias: reason + filterExpr: r_reason_sk is not null (type: boolean) + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: wp_web_page_sk is not null (type: boolean) - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + predicate: r_reason_sk is not null (type: boolean) + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: wp_web_page_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE + expressions: r_reason_sk (type: int), r_reason_desc (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col10 (type: int) + 0 _col13 (type: int) 1 _col0 (type: int) Execution mode: vectorized Local Work: @@ -224,18 +218,38 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 28), Map 9 (PARTITION-LEVEL SORT, 28) - Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 98), Reducer 2 (PARTITION-LEVEL SORT, 98) - Reducer 4 <- Map 12 (PARTITION-LEVEL SORT, 5), Reducer 3 (PARTITION-LEVEL SORT, 5) - Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 11), Reducer 4 (PARTITION-LEVEL SORT, 11) - Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 7), Reducer 5 (PARTITION-LEVEL SORT, 7) - Reducer 7 <- Reducer 6 (GROUP, 7) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 52), Map 9 (PARTITION-LEVEL SORT, 52) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 67), Reducer 2 (PARTITION-LEVEL SORT, 67) + Reducer 4 <- Map 11 (PARTITION-LEVEL SORT, 64), Reducer 3 (PARTITION-LEVEL SORT, 64) + Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 8), Reducer 4 (PARTITION-LEVEL SORT, 8) + Reducer 6 <- Map 13 (PARTITION-LEVEL SORT, 165), Reducer 5 (PARTITION-LEVEL SORT, 165) + Reducer 7 <- Reducer 6 (GROUP, 71) Reducer 8 <- Reducer 7 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: web_sales + filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 48000888 Data size: 6526732556 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + Execution mode: vectorized + Map 10 + Map Operator Tree: + TableScan alias: web_returns filterExpr: (wr_item_sk is not null and wr_order_number is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null and wr_refunded_addr_sk is not null and wr_reason_sk is not null) (type: boolean) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE @@ -253,103 +267,83 @@ STAGE PLANS: Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)) Execution mode: vectorized - Map 10 - Map Operator Tree: - TableScan - alias: customer_address - filterExpr: ((ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and (ca_country = 'United States') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) (type: boolean) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ca_address_sk (type: int), ca_state (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10000000 Data size: 10148798821 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized - Map 12 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 14 + Map 11 Map Operator Tree: TableScan alias: cd1 - filterExpr: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_marital_status is not null and cd_education_status is not null) (type: boolean) + filterExpr: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 15 + Map 12 Map Operator Tree: TableScan alias: cd2 - filterExpr: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_marital_status is not null and cd_education_status is not null) (type: boolean) + filterExpr: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) (type: boolean) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + predicate: ((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 465450 Data size: 179296539 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Map 13 + Map Operator Tree: + TableScan + alias: customer_address + filterExpr: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((ca_country = 'United States') and ca_address_sk is not null) (type: boolean) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ca_address_sk (type: int), ca_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Execution mode: vectorized Map 9 Map Operator Tree: TableScan - alias: web_sales - filterExpr: ((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and ws_item_sk is not null and ws_order_number is not null and ws_web_page_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: date_dim + filterExpr: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) (type: boolean) - Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE + predicate: ((d_year = 1998) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_web_page_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int), _col3 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col3 (type: int) - Statistics: Num rows: 16000296 Data size: 2175577518 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Reducer 2 Reduce Operator Tree: @@ -357,116 +351,114 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col3 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14 - Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 17600325 Data size: 2393135321 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col8 (type: int), _col10 (type: int), _col12 (type: int), _col13 (type: decimal(7,2)), _col14 (type: decimal(7,2)) + key expressions: _col1 (type: int), _col3 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col3 (type: int) + Statistics: Num rows: 52800977 Data size: 7179405967 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Reducer 3 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col8, _col10, _col12, _col13, _col14, _col16 - Statistics: Num rows: 19360357 Data size: 2632448910 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) (type: boolean) - Statistics: Num rows: 3226725 Data size: 438741326 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col8, _col12, _col13 - input vertices: - 1 Map 11 - Statistics: Num rows: 3549397 Data size: 482615469 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 3549397 Data size: 482615469 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)) + 0 _col1 (type: int), _col3 (type: int) + 1 _col0 (type: int), _col5 (type: int) + outputColumnNames: _col2, _col4, _col5, _col6, _col10, _col11, _col12, _col13, _col15, _col16 + Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col10 (type: int) + sort order: + + Map-reduce partition columns: _col10 (type: int) + Statistics: Num rows: 58081075 Data size: 7897346734 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 4 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col8 (type: int) + 0 _col10 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col4, _col6, _col7, _col12, _col13 - Statistics: Num rows: 3904336 Data size: 530877027 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col4 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3, _col6, _col7, _col12, _col13, _col22 - input vertices: - 1 Map 13 - Statistics: Num rows: 4294769 Data size: 583964742 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col2, _col4, _col5, _col6, _col11, _col12, _col13, _col15, _col16, _col18, _col19 + Statistics: Num rows: 63889183 Data size: 8687081595 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) (type: boolean) + Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4294769 Data size: 583964742 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col13 (type: decimal(7,2)), _col22 (type: string) + key expressions: _col12 (type: int), _col18 (type: string), _col19 (type: string) + sort order: +++ + Map-reduce partition columns: _col12 (type: int), _col18 (type: string), _col19 (type: string) + Statistics: Num rows: 5324097 Data size: 723923250 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col11 (type: int), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 5 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col3, _col6, _col7, _col12, _col13, _col22, _col24, _col25 - Statistics: Num rows: 4724246 Data size: 642361230 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) (type: boolean) - Statistics: Num rows: 393687 Data size: 53530079 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int), _col24 (type: string), _col25 (type: string) - sort order: +++ - Map-reduce partition columns: _col3 (type: int), _col24 (type: string), _col25 (type: string) - Statistics: Num rows: 393687 Data size: 53530079 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: decimal(7,2)), _col7 (type: decimal(7,2)), _col12 (type: int), _col22 (type: string) + 0 _col12 (type: int), _col18 (type: string), _col19 (type: string) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col2, _col4, _col6, _col11, _col13, _col15, _col16 + Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col11 (type: int) + sort order: + + Map-reduce partition columns: _col11 (type: int) + Statistics: Num rows: 5856506 Data size: 796315592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col4 (type: int), _col6 (type: decimal(7,2)), _col13 (type: int), _col15 (type: decimal(7,2)), _col16 (type: decimal(7,2)) Reducer 6 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int), _col24 (type: string), _col25 (type: string) - 1 _col0 (type: int), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col6, _col7, _col12, _col22 - Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) - keys: _col22 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) + 0 _col11 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col4, _col6, _col13, _col15, _col16, _col24 + Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) (type: boolean) + Statistics: Num rows: 7333332 Data size: 7442451276 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col13, _col15, _col16 + input vertices: + 1 Map 14 + Statistics: Num rows: 8066665 Data size: 8186696581 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col13 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col4, _col15, _col16, _col28 + input vertices: + 1 Map 15 + Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4), count(_col4), sum(_col16), count(_col16), sum(_col15), count(_col15) + keys: _col28 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 8873331 Data size: 9005366434 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) Reducer 7 Execution mode: vectorized Reduce Operator Tree: @@ -475,15 +467,15 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (_col1 / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 - Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ - Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 8 Execution mode: vectorized @@ -491,13 +483,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4436665 Data size: 4502682709 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 38500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 101400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/perf/spark/query89.q.out ql/src/test/results/clientpositive/perf/spark/query89.q.out index 1acc577669..96333a21f3 100644 --- ql/src/test/results/clientpositive/perf/spark/query89.q.out +++ ql/src/test/results/clientpositive/perf/spark/query89.q.out @@ -86,8 +86,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 400), Map 7 (PARTITION-LEVEL SORT, 400) - Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 438), Reducer 2 (PARTITION-LEVEL SORT, 438) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 398), Map 7 (PARTITION-LEVEL SORT, 398) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 442), Reducer 2 (PARTITION-LEVEL SORT, 442) Reducer 4 <- Reducer 3 (GROUP, 529) Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 265) Reducer 6 <- Reducer 5 (SORT, 1) @@ -107,33 +107,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) - Execution mode: vectorized - Map 7 - Map Operator Tree: - TableScan - alias: item - filterExpr: (((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and (((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null) (type: boolean) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized - Map 8 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -153,22 +133,42 @@ STAGE PLANS: Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized + Map 8 + Map Operator Tree: + TableScan + alias: item + filterExpr: (((((i_category = 'Home') or (i_category = 'Books') or (i_category = 'Electronics')) and ((i_class = 'wallpaper') or (i_class = 'parenting') or (i_class = 'musical'))) or (((i_category = 'Shoes') or (i_category = 'Jewelry') or (i_category = 'Men')) and ((i_class = 'womens') or (i_class = 'birdal') or (i_class = 'pants')))) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((((i_category = 'Home') or (i_category = 'Books') or (i_category = 'Electronics')) and ((i_class = 'wallpaper') or (i_class = 'parenting') or (i_class = 'musical'))) or (((i_category = 'Shoes') or (i_category = 'Jewelry') or (i_category = 'Men')) and ((i_class = 'womens') or (i_class = 'birdal') or (i_class = 'pants')))) and i_item_sk is not null) (type: boolean) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: int), i_brand (type: string), i_class (type: string), i_category (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5, _col6, _col7 + outputColumnNames: _col1, _col2, _col3, _col6 Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col5 (type: string), _col6 (type: string), _col7 (type: string) + value expressions: _col2 (type: int), _col3 (type: decimal(7,2)), _col6 (type: int) Reducer 3 Local Work: Map Reduce Local Work @@ -177,9 +177,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col5, _col6, _col7, _col10 + outputColumnNames: _col2, _col3, _col6, _col8, _col9, _col10 Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -187,20 +187,20 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col5, _col6, _col7, _col10, _col12, _col13 + outputColumnNames: _col3, _col6, _col8, _col9, _col10, _col12, _col13 input vertices: 1 Map 9 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) - keys: _col5 (type: string), _col6 (type: string), _col7 (type: string), _col10 (type: int), _col12 (type: string), _col13 (type: string) + keys: _col6 (type: int), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col12 (type: string), _col13 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE value expressions: _col6 (type: decimal(17,2)) Reducer 4 @@ -208,33 +208,33 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: string) + keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string), KEY._col5 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + key expressions: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string) + Map-reduce partition columns: _col3 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)) + value expressions: _col0 (type: int), _col2 (type: string), _col6 (type: decimal(17,2)) Reducer 5 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey1 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: int), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) + expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), VALUE._col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: string, _col1: string, _col2: string, _col3: int, _col4: string, _col5: string, _col6: decimal(17,2) + output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: string, _col6: decimal(17,2) type: WINDOWING Windowing table definition input alias: ptf_1 name: windowingtablefunction - order by: _col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST - partition by: _col2, _col0, _col4, _col5 + order by: _col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST + partition by: _col3, _col1, _col4, _col5 raw input shape: window functions: window function definition @@ -245,14 +245,14 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) + expressions: avg_window_0 (type: decimal(21,6)), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: decimal(17,2)) outputColumnNames: avg_window_0, _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: CASE WHEN ((avg_window_0 <> 0)) THEN (((abs((_col6 - avg_window_0)) / avg_window_0) > 0.1)) ELSE (null) END (type: boolean) Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col0 (type: string), _col4 (type: string), _col5 (type: string), _col3 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) + expressions: _col3 (type: string), _col2 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col0 (type: int), _col6 (type: decimal(17,2)), avg_window_0 (type: decimal(21,6)), (_col6 - avg_window_0) (type: decimal(22,6)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/perf/spark/query91.q.out ql/src/test/results/clientpositive/perf/spark/query91.q.out index de8977da51..c76a1d5ac4 100644 --- ql/src/test/results/clientpositive/perf/spark/query91.q.out +++ ql/src/test/results/clientpositive/perf/spark/query91.q.out @@ -69,19 +69,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 7 + Map 13 Map Operator Tree: TableScan - alias: call_center - filterExpr: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + alias: household_demographics + filterExpr: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: cc_call_center_sk is not null (type: boolean) - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE + expressions: hd_demo_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col2 (type: int) @@ -94,19 +94,19 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 13 + Map 12 Map Operator Tree: TableScan - alias: household_demographics - filterExpr: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE + alias: call_center + filterExpr: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((hd_buy_potential like '0-500%') and hd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + predicate: cc_call_center_sk is not null (type: boolean) + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: hd_demo_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE + expressions: cc_call_center_sk (type: int), cc_call_center_id (type: string), cc_name (type: string), cc_manager (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col2 (type: int) @@ -118,55 +118,74 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 750), Reducer 9 (PARTITION-LEVEL SORT, 750) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 25), Map 6 (PARTITION-LEVEL SORT, 25) - Reducer 3 <- Reducer 10 (PARTITION-LEVEL SORT, 745), Reducer 2 (PARTITION-LEVEL SORT, 745) - Reducer 4 <- Reducer 3 (GROUP, 787) - Reducer 5 <- Reducer 4 (SORT, 1) - Reducer 9 <- Map 11 (PARTITION-LEVEL SORT, 541), Map 8 (PARTITION-LEVEL SORT, 541) + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 25), Map 9 (PARTITION-LEVEL SORT, 25) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 538), Map 7 (PARTITION-LEVEL SORT, 538) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 750), Reducer 2 (PARTITION-LEVEL SORT, 750) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 680), Reducer 3 (PARTITION-LEVEL SORT, 680) + Reducer 5 <- Reducer 4 (GROUP, 787) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_returns - filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + alias: customer + filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) + expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Execution mode: vectorized + Map 11 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) + Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 11 + Map 7 Map Operator Tree: TableScan alias: customer_demographics - filterExpr: (((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and (((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and cd_demo_sk is not null) (type: boolean) + filterExpr: ((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) (type: boolean) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and cd_demo_sk is not null) (type: boolean) - Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE + predicate: ((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 116363 Data size: 44824327 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cd_demo_sk (type: int), cd_marital_status (type: string), cd_education_status (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116363 Data size: 44824327 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 930900 Data size: 358593079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116363 Data size: 44824327 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: vectorized - Map 12 + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -185,44 +204,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized - Map 6 + Map 9 Map Operator Tree: TableScan - alias: date_dim - filterExpr: ((d_year = 1999) and (d_moy = 11) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE + alias: catalog_returns + filterExpr: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + predicate: (cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) (type: boolean) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE + expressions: cr_returned_date_sk (type: int), cr_returning_customer_sk (type: int), cr_call_center_sk (type: int), cr_net_loss (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 8 - Map Operator Tree: - TableScan - alias: customer - filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) (type: boolean) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_hdemo_sk (type: int), c_current_addr_sk (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized Reducer 10 Local Work: @@ -232,38 +232,6 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col5, _col6 - Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col5, _col6 - input vertices: - 1 Map 13 - Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col2, _col7, _col8 - Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: string), _col8 (type: string) - Reducer 2 - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 @@ -276,7 +244,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col3, _col8, _col9, _col10 input vertices: - 1 Map 7 + 1 Map 12 Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -284,29 +252,73 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 34846646 Data size: 3699254122 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: decimal(7,2)), _col8 (type: string), _col9 (type: string), _col10 (type: string) - Reducer 3 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: int) - 1 _col2 (type: int) - outputColumnNames: _col3, _col8, _col9, _col10, _col18, _col19 - Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col3) - keys: _col8 (type: string), _col9 (type: string), _col10 (type: string), _col18 (type: string), _col19 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5, _col6 + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col5, _col6 + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 96800003 Data size: 83249958789 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col5 (type: string), _col6 (type: string) + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2, _col5, _col6, _col12, _col17, _col18, _col19 + Statistics: Num rows: 106480005 Data size: 91574956652 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col5, _col6, _col12, _col17, _col18, _col19 + input vertices: + 1 Map 13 Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) - sort order: +++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Group By Operator + aggregations: sum(_col12) + keys: _col5 (type: string), _col6 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(17,2)) - Reducer 4 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + sort order: +++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 117128008 Data size: 100732454500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col5 (type: decimal(17,2)) + Reducer 5 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -316,7 +328,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: decimal(17,2)) + expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -324,7 +336,7 @@ STAGE PLANS: sort order: - Statistics: Num rows: 58564004 Data size: 50366227250 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Reducer 5 + Reducer 6 Execution mode: vectorized Reduce Operator Tree: Select Operator @@ -338,22 +350,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2, _col3, _col5, _col6 - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int), _col5 (type: string), _col6 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/perf/tez/query13.q.out ql/src/test/results/clientpositive/perf/tez/query13.q.out index 5cd4e27de3..05ca42188b 100644 --- ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -101,18 +101,18 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -127,142 +127,140 @@ Stage-0 <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_37] Group By Operator [GBY_36] (rows=1 width=256) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)"] - Select Operator [SEL_35] (rows=715776 width=88) - Output:["_col6","_col8","_col9"] - Filter Operator [FIL_34] (rows=715776 width=88) - predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) - Merge Join Operator [MERGEJOIN_121] (rows=17178642 width=88) - Conds:RS_31._col2=RS_156._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - PartitionCols:_col0 - Select Operator [SEL_155] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_154] (rows=1861800 width=385) - predicate:(((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col2 - Filter Operator [FIL_30] (rows=15616947 width=88) - predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_120] (rows=93701693 width=88) - Conds:RS_27._col4=RS_148._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] - PartitionCols:_col0 - Select Operator [SEL_147] (rows=10000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=10000000 width=1014) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_119] (rows=85183356 width=88) - Conds:RS_24._col3=RS_140._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_140] - PartitionCols:_col0 - Select Operator [SEL_139] (rows=7200 width=107) - Output:["_col0","_col1"] - Filter Operator [FIL_138] (rows=7200 width=107) - predicate:(((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_118] (rows=77439413 width=88) - Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] - PartitionCols:_col0 - Select Operator [SEL_131] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_130] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=70399465 width=88) - Conds:RS_124._col0=RS_164._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_124] - PartitionCols:_col0 - Select Operator [SEL_123] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_122] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] - PartitionCols:_col4 - Select Operator [SEL_163] (rows=63999513 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_162] (rows=63999513 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_28_customer_address_ca_address_sk_min) AND DynamicValue(RS_28_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_28_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_32_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_32_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_32_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_18_store_s_store_sk_min) AND DynamicValue(RS_18_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_18_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_137] - Group By Operator [GBY_136] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] - Group By Operator [GBY_134] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_133] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_131] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_145] - Group By Operator [GBY_144] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_143] - Group By Operator [GBY_142] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_141] (rows=7200 width=107) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_139] - <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_153] - Group By Operator [GBY_152] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=10000000)"] - <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] - Group By Operator [GBY_150] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=10000000)"] - Select Operator [SEL_149] (rows=10000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_147] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_161] - Group By Operator [GBY_160] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] - Group By Operator [GBY_158] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_157] (rows=1861800 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_155] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_125] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_123] + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)"] + Merge Join Operator [MERGEJOIN_121] (rows=8066665 width=1014) + Conds:RS_32._col4=RS_156._col0(Inner),Output:["_col5","_col7","_col8"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col0 + Select Operator [SEL_155] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_154] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_15] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col4 + Filter Operator [FIL_31] (rows=7333332 width=1014) + predicate:((((_col18 = 'KY') or (_col18 = 'GA') or (_col18 = 'NM')) and _col9 BETWEEN 100 AND 200) or (((_col18 = 'MT') or (_col18 = 'OR') or (_col18 = 'IN')) and _col9 BETWEEN 150 AND 300) or (((_col18 = 'WI') or (_col18 = 'MO') or (_col18 = 'WV')) and _col9 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_120] (rows=22000000 width=1014) + Conds:RS_28._col3=RS_148._col0(Inner),Output:["_col4","_col5","_col7","_col8","_col9","_col18"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_148] + PartitionCols:_col0 + Select Operator [SEL_147] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_146] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col3 + Filter Operator [FIL_27] (rows=10647918 width=88) + predicate:(((_col13 = 'D') and (_col14 = 'Primary') and _col6 BETWEEN 50 AND 100 and (_col16 = 1)) or ((_col13 = 'M') and (_col14 = '4 yr Degree') and _col6 BETWEEN 100 AND 150 and (_col16 = 3)) or ((_col13 = 'U') and (_col14 = 'Advanced Degree') and _col6 BETWEEN 150 AND 200 and (_col16 = 1))) + Merge Join Operator [MERGEJOIN_119] (rows=255550079 width=88) + Conds:RS_24._col2=RS_140._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14","_col16"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_140] + PartitionCols:_col0 + Select Operator [SEL_139] (rows=3600 width=107) + Output:["_col0","_col1"] + Filter Operator [FIL_138] (rows=3600 width=107) + predicate:((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) + TableScan [TS_9] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_118] (rows=232318249 width=88) + Conds:RS_21._col1=RS_132._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col13","_col14"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_132] + PartitionCols:_col0 + Select Operator [SEL_131] (rows=465450 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_130] (rows=465450 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_117] (rows=211198404 width=88) + Conds:RS_164._col0=RS_124._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_122] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_164] + PartitionCols:_col0 + Select Operator [SEL_163] (rows=191998545 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_162] (rows=191998545 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_29_customer_address_ca_address_sk_min) AND DynamicValue(RS_29_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_29_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_25_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_25_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_25_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_33_store_s_store_sk_min) AND DynamicValue(RS_33_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_33_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_135] + Group By Operator [GBY_134] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_133] (rows=465450 width=385) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_131] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_143] + Group By Operator [GBY_142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_141] (rows=3600 width=107) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_139] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_153] + Group By Operator [GBY_152] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] + Select Operator [SEL_149] (rows=20000000 width=1014) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_147] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_161] + Group By Operator [GBY_160] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] + Group By Operator [GBY_158] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_157] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_155] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_129] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_127] + Group By Operator [GBY_126] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_125] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_123] diff --git ql/src/test/results/clientpositive/perf/tez/query15.q.out ql/src/test/results/clientpositive/perf/tez/query15.q.out index 3c7ae664b1..e1eca99d95 100644 --- ql/src/test/results/clientpositive/perf/tez/query15.q.out +++ ql/src/test/results/clientpositive/perf/tez/query15.q.out @@ -71,7 +71,7 @@ Stage-0 Select Operator [SEL_23] (rows=348467716 width=135) Output:["_col4","_col7"] Filter Operator [FIL_22] (rows=348467716 width=135) - predicate:((_col3) IN ('CA', 'WA', 'GA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) + predicate:((_col3 = 'CA') or (_col3 = 'GA') or (_col3 = 'WA') or (_col7 > 500) or (substr(_col4, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) Merge Join Operator [MERGEJOIN_77] (rows=348467716 width=135) Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col7"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/perf/tez/query34.q.out ql/src/test/results/clientpositive/perf/tez/query34.q.out index 9b7b482d3b..93e5324bcc 100644 --- ql/src/test/results/clientpositive/perf/tez/query34.q.out +++ ql/src/test/results/clientpositive/perf/tez/query34.q.out @@ -129,10 +129,10 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1200 width=107) + Select Operator [SEL_11] (rows=600 width=107) Output:["_col0"] - Filter Operator [FIL_55] (rows=1200 width=107) - predicate:(((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) + Filter Operator [FIL_55] (rows=600 width=107) + predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] <-Reducer 6 [SIMPLE_EDGE] @@ -177,7 +177,7 @@ Stage-0 SHUFFLE [RS_69] Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_67] (rows=1200 width=107) + Select Operator [SEL_67] (rows=600 width=107) Output:["_col0"] Please refer to the previous Select Operator [SEL_11] <-Reducer 15 [BROADCAST_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query48.q.out ql/src/test/results/clientpositive/perf/tez/query48.q.out index 1cf8d5c0da..7b0ce1c046 100644 --- ql/src/test/results/clientpositive/perf/tez/query48.q.out +++ ql/src/test/results/clientpositive/perf/tez/query48.q.out @@ -131,16 +131,16 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator @@ -153,115 +153,113 @@ Stage-0 <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_30] Group By Operator [GBY_29] (rows=1 width=8) - Output:["_col0"],aggregations:["sum(_col5)"] - Select Operator [SEL_28] (rows=15616947 width=88) - Output:["_col5"] - Filter Operator [FIL_27] (rows=15616947 width=88) - predicate:(((_col14) IN ('KY', 'GA', 'NM') and _col7 BETWEEN 0 AND 2000) or ((_col14) IN ('MT', 'OR', 'IN') and _col7 BETWEEN 150 AND 3000) or ((_col14) IN ('WI', 'MO', 'WV') and _col7 BETWEEN 50 AND 25000)) - Merge Join Operator [MERGEJOIN_96] (rows=93701693 width=88) - Conds:RS_24._col3=RS_123._col0(Inner),Output:["_col5","_col7","_col14"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - PartitionCols:_col0 - Select Operator [SEL_122] (rows=10000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_121] (rows=10000000 width=1014) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_95] (rows=85183356 width=88) - Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col5","_col7"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] - PartitionCols:_col0 - Select Operator [SEL_114] (rows=465450 width=385) - Output:["_col0"] - Filter Operator [FIL_113] (rows=465450 width=385) - predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) - TableScan [TS_9] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=77439413 width=88) - Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col5","_col7"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - PartitionCols:_col0 - Select Operator [SEL_106] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_105] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_93] (rows=70399465 width=88) - Conds:RS_99._col0=RS_131._col3(Inner),Output:["_col1","_col2","_col3","_col5","_col7"] - <-Map 1 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_99] - PartitionCols:_col0 - Select Operator [SEL_98] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_97] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - PartitionCols:_col3 - Select Operator [SEL_130] (rows=63999513 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Filter Operator [FIL_129] (rows=63999513 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_25_customer_address_ca_address_sk_min) AND DynamicValue(RS_25_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_25_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_22_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_22_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_22_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_15_store_s_store_sk_min) AND DynamicValue(RS_15_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_15_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_112] - Group By Operator [GBY_111] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - Group By Operator [GBY_109] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=36524 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_106] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] - Group By Operator [GBY_117] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_116] (rows=465450 width=385) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_114] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=10000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=10000000)"] - Select Operator [SEL_124] (rows=10000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_122] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] - Group By Operator [GBY_101] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_100] (rows=1704 width=1910) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_98] + Output:["_col0"],aggregations:["sum(_col4)"] + Merge Join Operator [MERGEJOIN_96] (rows=93701696 width=88) + Conds:RS_25._col3=RS_123._col0(Inner),Output:["_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_123] + PartitionCols:_col0 + Select Operator [SEL_122] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_121] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_12] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col3 + Filter Operator [FIL_24] (rows=85183359 width=88) + predicate:((((_col13 = 'KY') or (_col13 = 'GA') or (_col13 = 'NM')) and _col6 BETWEEN 0 AND 2000) or (((_col13 = 'MT') or (_col13 = 'OR') or (_col13 = 'IN')) and _col6 BETWEEN 150 AND 3000) or (((_col13 = 'WI') or (_col13 = 'MO') or (_col13 = 'WV')) and _col6 BETWEEN 50 AND 25000)) + Merge Join Operator [MERGEJOIN_95] (rows=255550079 width=88) + Conds:RS_21._col2=RS_115._col0(Inner),Output:["_col3","_col4","_col6","_col13"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + PartitionCols:_col0 + Select Operator [SEL_114] (rows=20000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_113] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_9] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_94] (rows=232318249 width=88) + Conds:RS_18._col1=RS_107._col0(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=465450 width=385) + Output:["_col0"] + Filter Operator [FIL_105] (rows=465450 width=385) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M') and cd_demo_sk is not null) + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_93] (rows=211198404 width=88) + Conds:RS_131._col0=RS_99._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_99] + PartitionCols:_col0 + Select Operator [SEL_98] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_97] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_131] + PartitionCols:_col0 + Select Operator [SEL_130] (rows=191998545 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col6"] + Filter Operator [FIL_129] (rows=191998545 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_22_customer_address_ca_address_sk_min) AND DynamicValue(RS_22_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_22_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_19_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_19_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_19_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_26_store_s_store_sk_min) AND DynamicValue(RS_26_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_26_store_s_store_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_108] (rows=465450 width=385) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] + Select Operator [SEL_116] (rows=20000000 width=1014) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_114] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_128] + Group By Operator [GBY_127] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_124] (rows=1704 width=1910) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_122] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_102] + Group By Operator [GBY_101] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_100] (rows=36524 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_98] diff --git ql/src/test/results/clientpositive/perf/tez/query53.q.out ql/src/test/results/clientpositive/perf/tez/query53.q.out index 3567534ac4..29d8e6ed7c 100644 --- ql/src/test/results/clientpositive/perf/tez/query53.q.out +++ ql/src/test/results/clientpositive/perf/tez/query53.q.out @@ -127,10 +127,10 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=115500 width=1436) + Select Operator [SEL_86] (rows=462000 width=1436) Output:["_col0","_col4"] - Filter Operator [FIL_85] (rows=115500 width=1436) - predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'reference', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'reference', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) + Filter Operator [FIL_85] (rows=462000 width=1436) + predicate:(((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'reference') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized @@ -172,7 +172,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=115500 width=1436) + Select Operator [SEL_88] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git ql/src/test/results/clientpositive/perf/tez/query63.q.out ql/src/test/results/clientpositive/perf/tez/query63.q.out index a5b7b5a788..3e99050263 100644 --- ql/src/test/results/clientpositive/perf/tez/query63.q.out +++ ql/src/test/results/clientpositive/perf/tez/query63.q.out @@ -129,10 +129,10 @@ Stage-0 <-Map 7 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=115500 width=1436) + Select Operator [SEL_86] (rows=462000 width=1436) Output:["_col0","_col4"] - Filter Operator [FIL_85] (rows=115500 width=1436) - predicate:((((i_category) IN ('Books', 'Children', 'Electronics') and (i_class) IN ('personal', 'portable', 'refernece', 'self-help') and (i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9')) or ((i_category) IN ('Women', 'Music', 'Men') and (i_class) IN ('accessories', 'classical', 'fragrances', 'pants') and (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1'))) and ((i_brand) IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 'exportiunivamalg #9', 'scholaramalgamalg #9') or (i_brand) IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 'importoamalg #1')) and ((i_category) IN ('Books', 'Children', 'Electronics') or (i_category) IN ('Women', 'Music', 'Men')) and ((i_class) IN ('personal', 'portable', 'refernece', 'self-help') or (i_class) IN ('accessories', 'classical', 'fragrances', 'pants')) and i_item_sk is not null) + Filter Operator [FIL_85] (rows=462000 width=1436) + predicate:(((((i_category = 'Books') or (i_category = 'Children') or (i_category = 'Electronics')) and ((i_class = 'personal') or (i_class = 'portable') or (i_class = 'refernece') or (i_class = 'self-help')) and ((i_brand = 'scholaramalgamalg #14') or (i_brand = 'scholaramalgamalg #7') or (i_brand = 'exportiunivamalg #9') or (i_brand = 'scholaramalgamalg #9'))) or (((i_category = 'Women') or (i_category = 'Music') or (i_category = 'Men')) and ((i_class = 'accessories') or (i_class = 'classical') or (i_class = 'fragrances') or (i_class = 'pants')) and ((i_brand = 'amalgimporto #1') or (i_brand = 'edu packscholar #1') or (i_brand = 'exportiimporto #1') or (i_brand = 'importoamalg #1')))) and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_manager_id"] <-Map 1 [SIMPLE_EDGE] vectorized @@ -174,7 +174,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=115500 width=1436) + Select Operator [SEL_88] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git ql/src/test/results/clientpositive/perf/tez/query71.q.out ql/src/test/results/clientpositive/perf/tez/query71.q.out index 4521aabc9f..95d7cd89b0 100644 --- ql/src/test/results/clientpositive/perf/tez/query71.q.out +++ ql/src/test/results/clientpositive/perf/tez/query71.q.out @@ -117,10 +117,10 @@ Stage-0 <-Map 20 [SIMPLE_EDGE] vectorized SHUFFLE [RS_173] PartitionCols:_col0 - Select Operator [SEL_172] (rows=86400 width=471) + Select Operator [SEL_172] (rows=43200 width=471) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_171] (rows=86400 width=471) - predicate:(((t_meal_time = 'breakfast') or (t_meal_time = 'dinner')) and t_time_sk is not null) + Filter Operator [FIL_171] (rows=43200 width=471) + predicate:((t_meal_time) IN ('breakfast', 'dinner') and t_time_sk is not null) TableScan [TS_35] (rows=86400 width=471) default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute","t_meal_time"] <-Reducer 4 [SIMPLE_EDGE] @@ -182,7 +182,7 @@ Stage-0 SHUFFLE [RS_176] Group By Operator [GBY_175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_174] (rows=86400 width=471) + Select Operator [SEL_174] (rows=43200 width=471) Output:["_col0"] Please refer to the previous Select Operator [SEL_172] <-Reducer 13 [BROADCAST_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query73.q.out ql/src/test/results/clientpositive/perf/tez/query73.q.out index cfa5213b5e..d62144e439 100644 --- ql/src/test/results/clientpositive/perf/tez/query73.q.out +++ ql/src/test/results/clientpositive/perf/tez/query73.q.out @@ -123,10 +123,10 @@ Stage-0 <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_11] (rows=1200 width=107) + Select Operator [SEL_11] (rows=600 width=107) Output:["_col0"] - Filter Operator [FIL_55] (rows=1200 width=107) - predicate:(((hd_buy_potential = '>10000') or (hd_buy_potential = 'unknown')) and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) + Filter Operator [FIL_55] (rows=600 width=107) + predicate:((hd_buy_potential) IN ('>10000', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.0D)) ELSE (null) END and hd_demo_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential","hd_dep_count","hd_vehicle_count"] <-Reducer 6 [SIMPLE_EDGE] @@ -171,7 +171,7 @@ Stage-0 SHUFFLE [RS_69] Group By Operator [GBY_68] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_67] (rows=1200 width=107) + Select Operator [SEL_67] (rows=600 width=107) Output:["_col0"] Please refer to the previous Select Operator [SEL_11] <-Reducer 15 [BROADCAST_EDGE] vectorized diff --git ql/src/test/results/clientpositive/perf/tez/query85.q.out ql/src/test/results/clientpositive/perf/tez/query85.q.out index 4e42d69735..06c6bdd502 100644 --- ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -167,16 +167,16 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 11 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE) +Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) Reducer 10 <- Reducer 9 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 11 (SIMPLE_EDGE) -Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Map 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 15 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 16 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 18 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -186,144 +186,144 @@ Stage-0 Stage-1 Reducer 10 vectorized File Output Operator [FS_244] - Limit [LIM_243] (rows=100 width=385) + Limit [LIM_243] (rows=100 width=1014) Number of rows:100 - Select Operator [SEL_242] (rows=1023990 width=385) + Select Operator [SEL_242] (rows=4436665 width=1014) Output:["_col0","_col1","_col2","_col3"] <-Reducer 9 [SIMPLE_EDGE] vectorized SHUFFLE [RS_241] - Select Operator [SEL_240] (rows=1023990 width=385) + Select Operator [SEL_240] (rows=4436665 width=1014) Output:["_col4","_col5","_col6","_col7"] - Group By Operator [GBY_239] (rows=1023990 width=385) + Group By Operator [GBY_239] (rows=4436665 width=1014) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=2047980 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22 - Merge Join Operator [MERGEJOIN_206] (rows=2047980 width=385) - Conds:RS_44._col3, _col24, _col25=RS_237._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] + Group By Operator [GBY_48] (rows=8873331 width=1014) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","count(_col4)","sum(_col16)","count(_col16)","sum(_col15)","count(_col15)"],keys:_col28 + Merge Join Operator [MERGEJOIN_206] (rows=8873331 width=1014) + Conds:RS_44._col13=RS_238._col0(Inner),Output:["_col4","_col15","_col16","_col28"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] - PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_236] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_235] (rows=1861800 width=385) - predicate:(((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null and cd_education_status is not null and cd_marital_status is not null) - TableScan [TS_21] (rows=1861800 width=385) - default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + SHUFFLE [RS_238] + PartitionCols:_col0 + Select Operator [SEL_237] (rows=72 width=200) + Output:["_col0","_col1"] + Filter Operator [FIL_236] (rows=72 width=200) + predicate:r_reason_sk is not null + TableScan [TS_21] (rows=72 width=200) + default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_44] - PartitionCols:_col3, _col24, _col25 - Filter Operator [FIL_43] (rows=393687 width=135) - predicate:(((_col24 = 'D') and (_col25 = 'Primary') and _col13 BETWEEN 50 AND 100) or ((_col24 = 'M') and (_col25 = '4 yr Degree') and _col13 BETWEEN 100 AND 150) or ((_col24 = 'U') and (_col25 = 'Advanced Degree') and _col13 BETWEEN 150 AND 200)) - Merge Join Operator [MERGEJOIN_205] (rows=4724246 width=135) - Conds:RS_40._col1=RS_238._col0(Inner),Output:["_col3","_col6","_col7","_col12","_col13","_col22","_col24","_col25"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_236] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_204] (rows=4294769 width=135) - Conds:RS_37._col4=RS_234._col0(Inner),Output:["_col1","_col3","_col6","_col7","_col12","_col13","_col22"] - <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + PartitionCols:_col13 + Merge Join Operator [MERGEJOIN_205] (rows=8066665 width=1014) + Conds:RS_41._col2=RS_217._col0(Inner),Output:["_col4","_col13","_col15","_col16"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_217] + PartitionCols:_col0 + Select Operator [SEL_216] (rows=4602 width=585) + Output:["_col0"] + Filter Operator [FIL_215] (rows=4602 width=585) + predicate:wp_web_page_sk is not null + TableScan [TS_18] (rows=4602 width=585) + default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col2 + Filter Operator [FIL_40] (rows=7333332 width=1014) + predicate:((((_col24 = 'KY') or (_col24 = 'GA') or (_col24 = 'NM')) and _col6 BETWEEN 100 AND 200) or (((_col24 = 'MT') or (_col24 = 'OR') or (_col24 = 'IN')) and _col6 BETWEEN 150 AND 300) or (((_col24 = 'WI') or (_col24 = 'MO') or (_col24 = 'WV')) and _col6 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_204] (rows=22000000 width=1014) + Conds:RS_37._col11=RS_235._col0(Inner),Output:["_col2","_col4","_col6","_col13","_col15","_col16","_col24"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] PartitionCols:_col0 - Select Operator [SEL_233] (rows=72 width=200) + Select Operator [SEL_234] (rows=20000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_232] (rows=72 width=200) - predicate:r_reason_sk is not null - TableScan [TS_15] (rows=72 width=200) - default@reason,reason,Tbl:COMPLETE,Col:NONE,Output:["r_reason_sk","r_reason_desc"] + Filter Operator [FIL_233] (rows=20000000 width=1014) + predicate:((ca_country = 'United States') and ca_address_sk is not null) + TableScan [TS_15] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_37] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_203] (rows=3904336 width=135) - Conds:RS_34._col8=RS_220._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col12","_col13"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_219] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_218] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + PartitionCols:_col11 + Merge Join Operator [MERGEJOIN_203] (rows=5856506 width=135) + Conds:RS_34._col12, _col18, _col19=RS_231._col0, _col1, _col2(Inner),Output:["_col2","_col4","_col6","_col11","_col13","_col15","_col16"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + PartitionCols:_col0, _col1, _col2 + Select Operator [SEL_230] (rows=465450 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_229] (rows=465450 width=385) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U') and cd_demo_sk is not null) + TableScan [TS_12] (rows=1861800 width=385) + default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_34] - PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_202] (rows=3549397 width=135) - Conds:RS_31._col10=RS_212._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col12","_col13"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=4602 width=585) - Output:["_col0"] - Filter Operator [FIL_210] (rows=4602 width=585) - predicate:wp_web_page_sk is not null - TableScan [TS_9] (rows=4602 width=585) - default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col10 - Filter Operator [FIL_30] (rows=3226725 width=135) - predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col14 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col14 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col14 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_201] (rows=19360357 width=135) - Conds:RS_27._col2=RS_231._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14","_col16"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=10000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_229] (rows=10000000 width=1014) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + PartitionCols:_col12, _col18, _col19 + Filter Operator [FIL_33] (rows=5324097 width=135) + predicate:(((_col18 = 'D') and (_col19 = 'Primary') and _col5 BETWEEN 50 AND 100) or ((_col18 = 'M') and (_col19 = '4 yr Degree') and _col5 BETWEEN 100 AND 150) or ((_col18 = 'U') and (_col19 = 'Advanced Degree') and _col5 BETWEEN 150 AND 200)) + Merge Join Operator [MERGEJOIN_202] (rows=63889183 width=135) + Conds:RS_30._col10=RS_232._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col11","_col12","_col13","_col15","_col16","_col18","_col19"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_230] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col10 + Merge Join Operator [MERGEJOIN_201] (rows=58081075 width=135) + Conds:RS_27._col1, _col3=RS_228._col0, _col5(Inner),Output:["_col2","_col4","_col5","_col6","_col10","_col11","_col12","_col13","_col15","_col16"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0, _col5 + Select Operator [SEL_227] (rows=14398467 width=92) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_226] (rows=14398467 width=92) + predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) + TableScan [TS_6] (rows=14398467 width=92) + default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_27] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_200] (rows=17600325 width=135) - Conds:RS_209._col0, _col5=RS_228._col1, _col3(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col10","_col12","_col13","_col14"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] - PartitionCols:_col0, _col5 - Select Operator [SEL_208] (rows=14398467 width=92) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_207] (rows=14398467 width=92) - predicate:(wr_item_sk is not null and wr_order_number is not null and wr_reason_sk is not null and wr_refunded_addr_sk is not null and wr_refunded_cdemo_sk is not null and wr_returning_cdemo_sk is not null) - TableScan [TS_0] (rows=14398467 width=92) - default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_refunded_cdemo_sk","wr_refunded_addr_sk","wr_returning_cdemo_sk","wr_reason_sk","wr_order_number","wr_fee","wr_refunded_cash"] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_200] (rows=52800977 width=135) + Conds:RS_225._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - PartitionCols:_col1, _col3 - Select Operator [SEL_227] (rows=16000296 width=135) + SHUFFLE [RS_209] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_207] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_224] (rows=48000888 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_226] (rows=16000296 width=135) - predicate:((ws_net_profit BETWEEN 100 AND 200 or ws_net_profit BETWEEN 150 AND 300 or ws_net_profit BETWEEN 50 AND 250) and (ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_32_web_page_wp_web_page_sk_min) AND DynamicValue(RS_32_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_32_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) - TableScan [TS_3] (rows=144002668 width=135) + Filter Operator [FIL_223] (rows=48000888 width=135) + predicate:((ws_sales_price BETWEEN 100 AND 150 or ws_sales_price BETWEEN 50 AND 100 or ws_sales_price BETWEEN 150 AND 200) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_42_web_page_wp_web_page_sk_min) AND DynamicValue(RS_42_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_42_web_page_wp_web_page_sk_bloom_filter))) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null and ws_web_page_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_order_number","ws_quantity","ws_sales_price","ws_net_profit"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] - Group By Operator [GBY_214] (rows=1 width=12) + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=4602 width=585) + Select Operator [SEL_210] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_208] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Group By Operator [GBY_222] (rows=1 width=12) + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=36524 width=1119) + Select Operator [SEL_218] (rows=4602 width=585) Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] + Please refer to the previous Select Operator [SEL_216] diff --git ql/src/test/results/clientpositive/perf/tez/query89.q.out ql/src/test/results/clientpositive/perf/tez/query89.q.out index ee3374ea5c..76a97f969e 100644 --- ql/src/test/results/clientpositive/perf/tez/query89.q.out +++ ql/src/test/results/clientpositive/perf/tez/query89.q.out @@ -85,21 +85,21 @@ Stage-0 Select Operator [SEL_29] (rows=383325119 width=88) Output:["avg_window_0","_col0","_col1","_col2","_col3","_col4","_col5","_col6"] PTF Operator [PTF_28] (rows=383325119 width=88) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col2 ASC NULLS FIRST, _col0 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col2, _col0, _col4, _col5"}] + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col1 ASC NULLS FIRST, _col4 ASC NULLS FIRST, _col5 ASC NULLS FIRST","partition by:":"_col3, _col1, _col4, _col5"}] Select Operator [SEL_27] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_113] - PartitionCols:_col2, _col0, _col4, _col5 + PartitionCols:_col3, _col1, _col4, _col5 Group By Operator [GBY_112] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_22] (rows=766650239 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col5, _col6, _col7, _col10, _col12, _col13 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)"],keys:_col6, _col8, _col9, _col10, _col12, _col13 Merge Join Operator [MERGEJOIN_84] (rows=766650239 width=88) - Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col5","_col6","_col7","_col10","_col12","_col13"] + Conds:RS_18._col2=RS_103._col0(Inner),Output:["_col3","_col6","_col8","_col9","_col10","_col12","_col13"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_103] PartitionCols:_col0 @@ -113,37 +113,37 @@ Stage-0 SHUFFLE [RS_18] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_83] (rows=696954748 width=88) - Conds:RS_15._col0=RS_95._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col7","_col10"] + Conds:RS_15._col1=RS_95._col0(Inner),Output:["_col2","_col3","_col6","_col8","_col9","_col10"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_94] (rows=36524 width=1119) - Output:["_col0","_col2"] - Filter Operator [FIL_93] (rows=36524 width=1119) - predicate:((d_year = 2000) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + Select Operator [SEL_94] (rows=462000 width=1436) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_93] (rows=462000 width=1436) + predicate:(((((i_category = 'Home') or (i_category = 'Books') or (i_category = 'Electronics')) and ((i_class = 'wallpaper') or (i_class = 'parenting') or (i_class = 'musical'))) or (((i_category = 'Shoes') or (i_category = 'Jewelry') or (i_category = 'Men')) and ((i_class = 'womens') or (i_class = 'birdal') or (i_class = 'pants')))) and i_item_sk is not null) + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] - PartitionCols:_col0 + PartitionCols:_col1 Merge Join Operator [MERGEJOIN_82] (rows=633595212 width=88) - Conds:RS_111._col1=RS_87._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6","_col7"] + Conds:RS_111._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col6"] <-Map 8 [SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_86] (rows=231000 width=1436) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_85] (rows=231000 width=1436) - predicate:((((i_category) IN ('Home', 'Books', 'Electronics') and (i_class) IN ('wallpaper', 'parenting', 'musical')) or ((i_category) IN ('Shoes', 'Jewelry', 'Men') and (i_class) IN ('womens', 'birdal', 'pants'))) and ((i_category) IN ('Home', 'Books', 'Electronics') or (i_category) IN ('Shoes', 'Jewelry', 'Men')) and ((i_class) IN ('wallpaper', 'parenting', 'musical') or (i_class) IN ('womens', 'birdal', 'pants')) and i_item_sk is not null) - TableScan [TS_3] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category"] + Select Operator [SEL_86] (rows=36524 width=1119) + Output:["_col0","_col2"] + Filter Operator [FIL_85] (rows=36524 width=1119) + predicate:((d_year = 2000) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_111] - PartitionCols:_col1 + PartitionCols:_col0 Select Operator [SEL_110] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_109] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_19_store_s_store_sk_min) AND DynamicValue(RS_19_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_19_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized @@ -154,7 +154,7 @@ Stage-0 SHUFFLE [RS_98] Group By Operator [GBY_97] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_96] (rows=36524 width=1119) + Select Operator [SEL_96] (rows=462000 width=1436) Output:["_col0"] Please refer to the previous Select Operator [SEL_94] <-Reducer 13 [BROADCAST_EDGE] vectorized @@ -176,7 +176,7 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_90] Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_88] (rows=231000 width=1436) + Select Operator [SEL_88] (rows=36524 width=1119) Output:["_col0"] Please refer to the previous Select Operator [SEL_86] diff --git ql/src/test/results/clientpositive/perf/tez/query91.q.out ql/src/test/results/clientpositive/perf/tez/query91.q.out index a53c7d796d..33c4339227 100644 --- ql/src/test/results/clientpositive/perf/tez/query91.q.out +++ ql/src/test/results/clientpositive/perf/tez/query91.q.out @@ -61,124 +61,122 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 14 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 15 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) +Reducer 12 <- Map 14 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 vectorized + Reducer 7 vectorized File Output Operator [FS_170] Select Operator [SEL_169] (rows=58564004 width=860) Output:["_col0","_col1","_col2","_col3"] - <-Reducer 5 [SIMPLE_EDGE] vectorized + <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_168] Select Operator [SEL_167] (rows=58564004 width=860) Output:["_col0","_col1","_col2","_col4"] Group By Operator [GBY_166] (rows=58564004 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 - <-Reducer 4 [SIMPLE_EDGE] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_41] (rows=117128008 width=860) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col10, _col18, _col19 + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col12)"],keys:_col5, _col6, _col17, _col18, _col19 Merge Join Operator [MERGEJOIN_144] (rows=117128008 width=860) - Conds:RS_37._col1=RS_38._col2(Inner),Output:["_col3","_col8","_col9","_col10","_col18","_col19"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col2 - Select Operator [SEL_30] (rows=106480005 width=860) - Output:["_col2","_col7","_col8"] - Merge Join Operator [MERGEJOIN_143] (rows=106480005 width=860) - Conds:RS_27._col2=RS_165._col0(Inner),Output:["_col0","_col5","_col6"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] - PartitionCols:_col0 - Select Operator [SEL_164] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_163] (rows=3600 width=107) - predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) - TableScan [TS_18] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_142] (rows=96800003 width=860) - Conds:RS_24._col3=RS_162._col0(Inner),Output:["_col0","_col2","_col5","_col6"] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] - PartitionCols:_col0 - Select Operator [SEL_161] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_160] (rows=20000000 width=1014) - predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) - TableScan [TS_15] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_141] (rows=88000001 width=860) - Conds:RS_156._col1=RS_159._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] - PartitionCols:_col0 - Select Operator [SEL_158] (rows=930900 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_157] (rows=930900 width=385) - predicate:((((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and cd_demo_sk is not null) - TableScan [TS_12] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] - PartitionCols:_col1 - Select Operator [SEL_155] (rows=80000000 width=860) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_154] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) - TableScan [TS_9] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] - <-Reducer 3 [SIMPLE_EDGE] + Conds:RS_37._col2=RS_165._col0(Inner),Output:["_col5","_col6","_col12","_col17","_col18","_col19"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_165] + PartitionCols:_col0 + Select Operator [SEL_164] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_163] (rows=3600 width=107) + predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) + TableScan [TS_25] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] + <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_37] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_140] (rows=34846646 width=106) - Conds:RS_34._col2=RS_153._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] - PartitionCols:_col0 - Select Operator [SEL_152] (rows=60 width=2045) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_151] (rows=60 width=2045) - predicate:cc_call_center_sk is not null - TableScan [TS_6] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_139] (rows=31678769 width=106) - Conds:RS_147._col0=RS_150._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_143] (rows=106480005 width=860) + Conds:RS_34._col0=RS_35._col1(Inner),Output:["_col2","_col5","_col6","_col12","_col17","_col18","_col19"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_142] (rows=34846646 width=106) + Conds:RS_21._col2=RS_162._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_146] (rows=28798881 width=106) + Select Operator [SEL_161] (rows=60 width=2045) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_145] (rows=28798881 width=106) - predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) - TableScan [TS_0] (rows=28798881 width=106) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] + Filter Operator [FIL_160] (rows=60 width=2045) + predicate:cc_call_center_sk is not null + TableScan [TS_15] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_141] (rows=31678769 width=106) + Conds:RS_156._col0=RS_159._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_156] + PartitionCols:_col0 + Select Operator [SEL_155] (rows=28798881 width=106) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_154] (rows=28798881 width=106) + predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) + TableScan [TS_9] (rows=28798881 width=106) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_159] + PartitionCols:_col0 + Select Operator [SEL_158] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_157] (rows=18262 width=1119) + predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_140] (rows=96800003 width=860) + Conds:RS_31._col3=RS_153._col0(Inner),Output:["_col0","_col2","_col5","_col6"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_149] (rows=18262 width=1119) + Select Operator [SEL_152] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_148] (rows=18262 width=1119) - predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + Filter Operator [FIL_151] (rows=20000000 width=1014) + predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_139] (rows=88000001 width=860) + Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_147] + PartitionCols:_col1 + Select Operator [SEL_146] (rows=80000000 width=860) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_145] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_150] + PartitionCols:_col0 + Select Operator [SEL_149] (rows=116363 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_148] (rows=116363 width=385) + predicate:((cd_education_status) IN ('Unknown', 'Advanced Degree') and (cd_marital_status) IN ('M', 'W') and (struct(cd_marital_status,cd_education_status)) IN (const struct('M','Unknown'), const struct('W','Advanced Degree')) and cd_demo_sk is not null) + TableScan [TS_3] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] diff --git ql/src/test/results/clientpositive/ppd_transform.q.out ql/src/test/results/clientpositive/ppd_transform.q.out index b38088f16a..659b2f88d0 100644 --- ql/src/test/results/clientpositive/ppd_transform.q.out +++ ql/src/test/results/clientpositive/ppd_transform.q.out @@ -382,21 +382,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('a', 'b') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('c', 'd') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/remove_exprs_stats.q.out ql/src/test/results/clientpositive/remove_exprs_stats.q.out index a9c0051371..712fb37637 100644 --- ql/src/test/results/clientpositive/remove_exprs_stats.q.out +++ ql/src/test/results/clientpositive/remove_exprs_stats.q.out @@ -460,13 +460,13 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc_n0 - filterExpr: (locid) IN (5) (type: boolean) + filterExpr: (locid = 5) (type: boolean) Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (locid) IN (5) (type: boolean) + predicate: (locid = 5) (type: boolean) Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + expressions: state (type: string), 5 (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git ql/src/test/results/clientpositive/spark/auto_join19.q.out ql/src/test/results/clientpositive/spark/auto_join19.q.out index d7d8caee33..07bee8553c 100644 --- ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -56,7 +56,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: (((ds = '2008-04-08') or (ds = '2008-04-09')) and ((hr = '12') or (hr = '11')) and key is not null) (type: boolean) + filterExpr: ((ds) IN ('2008-04-08', '2008-04-09') and (hr) IN ('12', '11') and key is not null) (type: boolean) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index e07904ac44..62e5498376 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -82,15 +82,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + filterExpr: ((key) IN (0, 5) and (ds = '1')) (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -98,26 +98,26 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col4 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -236,15 +236,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table1_n20 - filterExpr: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + filterExpr: ((key) IN (0, 5) and (ds = '1')) (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + predicate: (key) IN (0, 5) (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -252,26 +252,26 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -381,57 +381,86 @@ ON a.key = b.key WHERE a.key = 0 or a.key = 5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1_n20 - filterExpr: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) + filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((key = 0) or (key = 5)) and (key < 8)) (type: boolean) - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: test_table2_n19 + filterExpr: ((key) IN (0, 5) and (key < 8)) (type: boolean) + Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and (key) IN (0, 5)) (type: boolean) + Statistics: Num rows: 14 Data size: 122 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 122 Basic stats: COMPLETE Column stats: NONE + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + input vertices: + 0 Map 1 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Reducer 2 + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out index a35edb42a8..e61300b5c8 100644 --- ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out +++ ql/src/test/results/clientpositive/spark/cbo_simple_select.q.out @@ -851,9 +851,9 @@ STAGE PLANS: Processor Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = (2 * c_int))) (type: boolean) Filter Operator - predicate: (c_int) IN (c_int, (2 * c_int)) (type: boolean) + predicate: ((c_int = (2 * c_int)) or (c_int = c_int)) (type: boolean) Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -875,9 +875,9 @@ STAGE PLANS: Processor Tree: TableScan alias: cbo_t2 - filterExpr: (c_int) IN (c_int, 0) (type: boolean) + filterExpr: ((c_int = c_int) or (c_int = 0)) (type: boolean) Filter Operator - predicate: (c_int) IN (c_int, 0) (type: boolean) + predicate: ((c_int = 0) or (c_int = c_int)) (type: boolean) Select Operator expressions: key (type: string), value (type: string), c_int (type: int), c_float (type: float), c_boolean (type: boolean), dt (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out index 83437e5593..ca0b222f62 100644 --- ql/src/test/results/clientpositive/spark/pcr.q.out +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -1502,11 +1502,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-0 PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT CAST(14 AS INTEGER) AS `key`, `value` -FROM (SELECT `value` -FROM `default`.`pcr_t1` -WHERE (`ds` = '2000-04-08' OR `ds` = '2000-04-09') AND `key` = 14 -ORDER BY `value`) AS `t1` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1522,7 +1517,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: (((ds = '2000-04-08') or (ds = '2000-04-09')) and (key = 14)) (type: boolean) + filterExpr: ((ds) IN ('2000-04-08', '2000-04-09') and (key = 14)) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator @@ -1696,10 +1691,6 @@ PREHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04 PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value` -FROM `default`.`pcr_t1` -WHERE `ds` = '2000-04-08' OR `ds` = '2000-04-09' -ORDER BY `key`, `value` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1715,7 +1706,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: ((ds = '2000-04-08') or (ds = '2000-04-09')) (type: boolean) + filterExpr: (ds) IN ('2000-04-08', '2000-04-09') (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator @@ -2225,10 +2216,6 @@ PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='20 PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value`, `ds` -FROM `default`.`pcr_t1` -WHERE `ds` = '2000-04-08' AND `key` = 1 OR `ds` = '2000-04-09' AND `key` = 2 -ORDER BY `key`, `value`, `ds` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -2244,22 +2231,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pcr_t1 - filterExpr: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + filterExpr: ((struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) and (struct(ds)) IN (struct('2000-04-08'), struct('2000-04-09'))) (type: boolean) Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + predicate: (struct(key,ds)) IN (const struct(1,'2000-04-08'), const struct(2,'2000-04-09')) (type: boolean) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) null sort order: aaa sort order: +++ - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE tag: -1 auto parallelism: false Execution mode: vectorized @@ -2372,13 +2359,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 40 Data size: 320 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4289,11 +4276,6 @@ PREHOOK: query: explain extended select key, value, ds, hr from srcpart where ds PREHOOK: type: QUERY POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr POSTHOOK: type: QUERY -OPTIMIZED SQL: SELECT `key`, `value`, CAST('2008-04-08' AS STRING) AS `ds`, `hr` -FROM (SELECT `key`, `value`, `hr` -FROM `default`.`srcpart` -WHERE `ds` = '2008-04-08' AND (`hr` = '11' OR `hr` = '12') AND `key` = 11 -ORDER BY `key`, `hr`) AS `t1` STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -4309,7 +4291,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart - filterExpr: ((ds = '2008-04-08') and ((hr = '11') or (hr = '12')) and (UDFToDouble(key) = 11.0D)) (type: boolean) + filterExpr: ((hr) IN ('11', '12') and (ds = '2008-04-08') and (UDFToDouble(key) = 11.0D)) (type: boolean) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator diff --git ql/src/test/results/clientpositive/spark/ppd_transform.q.out ql/src/test/results/clientpositive/spark/ppd_transform.q.out index 4dfc0fed6e..b6a81a1aeb 100644 --- ql/src/test/results/clientpositive/spark/ppd_transform.q.out +++ ql/src/test/results/clientpositive/spark/ppd_transform.q.out @@ -399,21 +399,21 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col0 = 'a') or (_col0 = 'b')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('a', 'b') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Filter Operator - predicate: ((_col0 = 'c') or (_col0 = 'd')) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0) IN ('c', 'd') (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out index 24202522f5..9ad432ee82 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning.q.out @@ -6354,15 +6354,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpart_date_hour_n1 - filterExpr: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) + filterExpr: ((date) IN ('2008-04-08', '2008-04-09') and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) Statistics: Num rows: 4 Data size: 108 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((date = '2008-04-08') or (date = '2008-04-09')) and (UDFToDouble(hour) = 11.0D) and ds is not null and hr is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + predicate: ((UDFToDouble(hour) = 11.0D) and (date) IN ('2008-04-08', '2008-04-09') and ds is not null and hr is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string), hr (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string), UDFToDouble(_col1) (type: double) @@ -6370,27 +6370,27 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator Target Columns: [Map 1 -> [ds:string (ds)]] - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToDouble(_col2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: double) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator Target Columns: [Map 1 -> [hr:int (UDFToDouble(hr))]] - Statistics: Num rows: 2 Data size: 54 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 736321b369..00f5d7ef11 100644 --- ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -1598,7 +1598,7 @@ Stage-0 Select Operator [SEL_2] Output:["_col0"] Filter Operator [FIL_4] - predicate:((c_int = -6) or (c_int = 6)) + predicate:(c_int) IN (-6, 6) TableScan [TS_0] Output:["key","c_int"] diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 8b1a2be89b..109b416d8a 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -141,7 +141,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean) + filterExpr: ((cdate <> DATE'1969-10-26') and (cdate <> DATE'1969-07-14') and (cdate <> DATE'1970-01-21')) (type: boolean) Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -149,15 +149,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: LongColumnInList(col 3, values [-67, -171, 20]) -> 5:boolean) - predicate: (not (cdate) IN (DATE'1969-10-26', DATE'1969-07-14', DATE'1970-01-21')) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterDateColNotEqualDateScalar(col 3:date, val -67), FilterDateColNotEqualDateScalar(col 3:date, val -171), FilterDateColNotEqualDateScalar(col 3:date, val 20)) + predicate: ((cdate <> DATE'1969-07-14') and (cdate <> DATE'1969-10-26') and (cdate <> DATE'1970-01-21')) (type: boolean) + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -343,7 +343,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_date_test - filterExpr: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) + filterExpr: ((cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135) and (cdecimal1 <> -3367.6517567568)) (type: boolean) Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -351,15 +351,15 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsFalse(col 5:boolean)(children: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean) - predicate: (not (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135), FilterDecimalColNotEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568)) + predicate: ((cdecimal1 <> -3367.6517567568) and (cdecimal1 <> 2365.8945945946) and (cdecimal1 <> 881.0135135135)) (type: boolean) + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [] - Statistics: Num rows: 6144 Data size: 1233707 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() Group By Vectorization: @@ -1091,13 +1091,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (cdate) IN (DATE'1969-10-26', DATE'1969-07-14') (type: boolean) + expressions: ((cdate = DATE'1969-10-26') or (cdate = DATE'1969-07-14')) (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: LongColumnInList(col 3, values [-67, -171]) -> 5:boolean + projectedOutputColumnNums: [7] + selectExpressions: ColOrCol(col 5:boolean, col 6:boolean)(children: DateColEqualDateScalar(col 3:date, date 1969-10-26) -> 5:boolean, DateColEqualDateScalar(col 3:date, date 1969-07-14) -> 6:boolean) -> 7:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1105,7 +1105,7 @@ STAGE PLANS: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:boolean + keyExpressions: col 7:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1227,13 +1227,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (cdecimal1) IN (2365.8945945946, 881.0135135135, -3367.6517567568) (type: boolean) + expressions: ((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)) (type: boolean) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5] - selectExpressions: DecimalColumnInList(col 1:decimal(20,10), values [2365.8945945946, 881.0135135135, -3367.6517567568]) -> 5:boolean + projectedOutputColumnNums: [8] + selectExpressions: VectorUDFAdaptor(((cdecimal1 = 2365.8945945946) or (cdecimal1 = 881.0135135135) or (cdecimal1 = -3367.6517567568)))(children: DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 2365.8945945946) -> 5:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val 881.0135135135) -> 6:boolean, DecimalColEqualDecimalScalar(col 1:decimal(20,10), val -3367.6517567568) -> 7:boolean) -> 8:boolean Statistics: Num rows: 12289 Data size: 2467616 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1241,7 +1241,7 @@ STAGE PLANS: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:boolean + keyExpressions: col 8:boolean native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1267,7 +1267,7 @@ STAGE PLANS: featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reducer 2 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 0bf2a4bfa5..611e220e54 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean) + filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -60,9 +60,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583)) - predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583]) + predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -71,13 +71,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 17, 21] selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -200,7 +200,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean) + filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -209,9 +209,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583)) - predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583]) + predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -220,13 +220,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 18, 24] selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out index 5a50431d26..e9d99c5feb 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_5.q.out @@ -303,7 +303,7 @@ Stage-3 Select Operator [SEL_2] (rows=2/2 width=302) Output:["_col0","_col1","_col3"] Filter Operator [FIL_9] (rows=2/2 width=226) - predicate:((de = 109.23) or (de = 119.23)) + predicate:(de) IN (109.23, 119.23) TableScan [TS_0] (rows=4/4 width=226) default@acid_uami_n2,acid_uami_n2, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["i","de","vc"] diff --git ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out index 966edad025..7ddfda1bc6 100644 --- ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out +++ ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out @@ -16,28 +16,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean) + filterExpr: ((cint = UDFToInteger(ctinyint)) or (cint = UDFToInteger(cbigint))) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cint) IN (UDFToInteger(ctinyint), UDFToInteger(cbigint)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE + predicate: ((cint = UDFToInteger(cbigint)) or (cint = UDFToInteger(ctinyint))) (type: boolean) + Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: FILTER operator: Vectorizing IN expression only supported for constant values - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index 828131f8c6..90fccafa83 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -48,7 +48,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean) + filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -57,9 +57,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583)) - predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583]) + predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -68,13 +68,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 17, 21] selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 16:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprStringScalarStringScalar(col 15:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean) -> 16:string) -> 17:string, IfExprColumnCondExpr(col 15:boolean, col 18:stringcol 20:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 15:boolean, ConstantVectorExpression(val a) -> 18:string, IfExprStringScalarStringScalar(col 19:boolean, val b, val c)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 19:boolean) -> 20:string) -> 21:string - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -194,7 +194,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((csmallint = 418S) or (csmallint = 12205S) or (csmallint = 10583S)) (type: boolean) + filterExpr: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -203,9 +203,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:smallint, val 418), FilterLongColEqualLongScalar(col 1:smallint, val 12205), FilterLongColEqualLongScalar(col 1:smallint, val 10583)) - predicate: ((csmallint = 10583S) or (csmallint = 12205S) or (csmallint = 418S)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterLongColumnInList(col 1:smallint, values [418, 12205, 10583]) + predicate: (csmallint) IN (418S, 12205S, 10583S) (type: boolean) + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418S)) THEN ('a') WHEN ((csmallint = 12205S)) THEN (null) ELSE ('c') END (type: string) outputColumnNames: _col0, _col1, _col2 @@ -214,13 +214,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 18, 24] selectExpressions: IfExprColumnCondExpr(col 13:boolean, col 14:stringcol 17:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 13:boolean, ConstantVectorExpression(val a) -> 14:string, IfExprColumnNull(col 15:boolean, col 16:string, null)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 15:boolean, ConstantVectorExpression(val b) -> 16:string) -> 17:string) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 23:string)(children: LongColEqualLongScalar(col 1:smallint, val 418) -> 19:boolean, ConstantVectorExpression(val a) -> 20:string, IfExprNullColumn(col 21:boolean, null, col 22)(children: LongColEqualLongScalar(col 1:smallint, val 12205) -> 21:boolean, ConstantVectorExpression(val c) -> 22:string) -> 23:string) -> 24:string - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2907994 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6144 Data size: 1453997 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat