diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 83053d1..554df3c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -93,7 +93,7 @@ INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"), INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"), NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"), - SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"), + @Deprecated SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"), COLUMN_REPEATED_IN_PARTITIONING_COLS(10035, "Column repeated in partitioning columns"), DUPLICATE_COLUMN_NAMES(10036, "Duplicate column name:"), INVALID_BUCKET_NUMBER(10037, "Bucket number should be bigger than zero"), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f5a1c74..3d53706 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -302,6 +302,8 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; +import java.util.stream.IntStream; import javax.sql.DataSource; @@ -863,7 +865,7 @@ public Object post(Object t) { // Now check QB in more detail. canHandleQbForCbo returns null if query can // be handled. - msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb); + msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage); if (msg == null) { return Pair.of(true, msg); } @@ -891,8 +893,8 @@ public Object post(Object t) { * Query
* 2. Nested Subquery will return false for qbToChk.getIsQuery() */ - static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, - boolean topLevelQB, boolean verbose, QB qb) { + private static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, + boolean topLevelQB, boolean verbose) { if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() @@ -2508,8 +2510,7 @@ private RelNode genSetOpLogicalPlan(Opcode opcode, String alias, String leftalia throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg(opcode.toString())); } relToHiveRR.put(setOpRel, setOpOutRR); - relToHiveColNameCalcitePosMap.put(setOpRel, - this.buildHiveToCalciteColumnMap(setOpOutRR, setOpRel)); + relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR)); return setOpRel; } @@ -2647,7 +2648,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r if (!RowResolver.add(joinRR, newLeftRR)) { LOG.warn("Duplicates detected when adding columns to RR: see previous message"); } - relToHiveColNameCalcitePosMap.put(topRel, this.buildHiveToCalciteColumnMap(joinRR, topRel)); + relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(joinRR)); relToHiveRR.put(topRel, joinRR); // Introduce top project operator to remove additional column(s) that have @@ -2678,7 +2679,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r } // 4. Add new rel & its RR to the maps - relToHiveColNameCalcitePosMap.put(topRel, this.buildHiveToCalciteColumnMap(topRR, topRel)); + relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(topRR)); relToHiveRR.put(topRel, topRR); return topRel; } @@ -2731,15 +2732,14 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a if ((left.getToken().getType() == HiveParser.TOK_TABREF) || (left.getToken().getType() == HiveParser.TOK_SUBQUERY) || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { - String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( - (ASTNode) left.getChild(0)).toLowerCase(); - leftTableAlias = left.getChildCount() == 1 ? tableName : SemanticAnalyzer - .unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase()); + String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)).toLowerCase(); + leftTableAlias = left.getChildCount() == 1 ? tableName : + unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase()); // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? // partitionTableFunctionSource partitioningSpec? expression*) // guranteed to have an lias here: check done in processJoin - leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer - .unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias; + leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? + unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias; leftRel = aliasToRel.get(leftTableAlias); } else if (SemanticAnalyzer.isJoinToken(left)) { leftRel = genJoinLogicalPlan(left, aliasToRel); @@ -2755,15 +2755,14 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a if ((right.getToken().getType() == HiveParser.TOK_TABREF) || (right.getToken().getType() == HiveParser.TOK_SUBQUERY) || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { - String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( - (ASTNode) right.getChild(0)).toLowerCase(); - rightTableAlias = right.getChildCount() == 1 ? tableName : SemanticAnalyzer - .unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase()); + String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)).toLowerCase(); + rightTableAlias = right.getChildCount() == 1 ? tableName : + unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase()); // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? // partitionTableFunctionSource partitioningSpec? expression*) // guranteed to have an lias here: check done in processJoin - rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer - .unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias; + rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? + unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias; rightRel = aliasToRel.get(rightTableAlias); } else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) { rightRel = genLateralViewPlans(right, aliasToRel); @@ -2804,7 +2803,6 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc // Virtual Cols // 3.1 Add Column info for non partion cols (Object Inspector fields) - @SuppressWarnings("deprecation") StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer() .getObjectInspector(); List fields = rowObjectInspector.getAllStructFieldRefs(); @@ -2981,8 +2979,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc } // 6. Add Schema(RR) to RelNode-Schema map - ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, - tableRel); + ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr); relToHiveRR.put(tableRel, rr); relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap); } catch (Exception e) { @@ -3169,17 +3166,16 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al case HiveParser.TOK_TABREF: case HiveParser.TOK_SUBQUERY: case HiveParser.TOK_PTBLFUNCTION: - String inputTableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName( - (ASTNode) next.getChild(0)).toLowerCase(); + String inputTableName = getUnescapedUnqualifiedTableName((ASTNode) next.getChild(0)).toLowerCase(); String inputTableAlias; if (next.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) { // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias? // partitionTableFunctionSource partitioningSpec? expression*) // ptf node guaranteed to have an alias here - inputTableAlias = SemanticAnalyzer.unescapeIdentifier(next.getChild(1).getText().toLowerCase()); + inputTableAlias = unescapeIdentifier(next.getChild(1).getText().toLowerCase()); } else { inputTableAlias = next.getChildCount() == 1 ? inputTableName : - SemanticAnalyzer.unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase()); + unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase()); } inputRel = aliasToRel.get(inputTableAlias); break; @@ -3216,8 +3212,6 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al (ASTNode) functionCall.getChild(1); // Output types. They will be the concatenation of the input refs types and // the types of the expressions for the lateral view generated rows - List outputFieldTypes = new ArrayList<>(inputRefsTypes); - List outputFieldNames = new ArrayList<>(inputRel.getRowType().getFieldNames()); // Generate all expressions from lateral view ExprNodeDesc valuesExpr = genExprNodeDesc(valuesClause, inputRR, false); RexCall convertedOriginalValuesExpr = (RexCall) new RexNodeConverter(this.cluster, inputRel.getRowType(), @@ -3289,7 +3283,6 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al columnAliases.add(SemanticAnalyzer.getColumnInternalName(i)); } } - int numInputExprs = inputRR.getColumnInfos().size(); ListTypeInfo listTypeInfo = (ListTypeInfo) valuesExpr.getTypeInfo(); // Array should have ListTypeInfo StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types for (int i = 0, j = 0; i < columnAliases.size(); i++) { @@ -3301,8 +3294,7 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al new ColumnInfo(internalColName, typeInfos.getAllStructFieldTypeInfos().get(i), tableAlias, false)); } - this.relToHiveColNameCalcitePosMap - .put(htfsRel, buildHiveToCalciteColumnMap(outputRR, htfsRel)); + this.relToHiveColNameCalcitePosMap.put(htfsRel, buildHiveToCalciteColumnMap(outputRR)); this.relToHiveRR.put(htfsRel, outputRR); // 4) Return new operator @@ -3374,8 +3366,8 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean } private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, - Map aliasToRel, ImmutableMap outerNameToPosMap, - RowResolver outerRR, boolean forHavingClause) throws SemanticException { + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause) + throws SemanticException { Map subQueryToRelNode = new HashMap<>(); boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause, @@ -3402,39 +3394,15 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, } } - private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException { - RowResolver iRR = relToHiveRR.get(srcRel); - RowResolver oRR = new RowResolver(); - RowResolver.add(oRR, iRR, numColumns); - - List calciteColLst = new ArrayList(); - List oFieldNames = new ArrayList(); - RelDataType iType = srcRel.getRowType(); - - for (int i = 0; i < iType.getFieldCount(); i++) { - RelDataTypeField fType = iType.getFieldList().get(i); - String fName = iType.getFieldNames().get(i); - calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i)); - oFieldNames.add(fName); - } - - HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames); - - this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel)); - this.relToHiveRR.put(selRel, oRR); - return selRel; - } - - private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel, - ImmutableMap outerNameToPosMap, RowResolver outerRR, - boolean forHavingClause) throws SemanticException { + private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap outerNameToPosMap, + RowResolver outerRR, boolean forHavingClause) throws SemanticException { RelNode filterRel = null; Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values() .iterator(); if (whereClauseIterator.hasNext()) { filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel, - aliasToRel, outerNameToPosMap, outerRR, forHavingClause); + outerNameToPosMap, outerRR, forHavingClause); } return filterRel; @@ -3458,9 +3426,8 @@ private AggInfo(List aggParams, TypeInfo returnType, String udfNam } } - private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst, - RexNodeConverter converter, HashMap rexNodeToPosMap, - Integer childProjLstIndx) throws SemanticException { + private AggregateCall convertGBAgg(AggInfo agg, List gbChildProjLst, RexNodeConverter converter, + HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException { // 1. Get agg fn ret type in Calcite RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType, @@ -3534,7 +3501,7 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs List aggregateCalls = Lists.newArrayList(); for (AggInfo agg : aggInfoLst) { - aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap, + aggregateCalls.add(convertGBAgg(agg, gbChildProjLst, converter, rexNodeToPosMap, gbChildProjLst.size())); } if (hasGroupSets) { @@ -3579,12 +3546,12 @@ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, RowResolver gByInputRR, RowResolver gByRR) { if (gByExpr.getType() == HiveParser.DOT && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) { - String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0) + String tab_alias = unescapeIdentifier(gByExpr.getChild(0).getChild(0) .getText().toLowerCase()); - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase()); + String col_alias = unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase()); gByRR.put(tab_alias, col_alias, colInfo); } else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) { - String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase()); + String col_alias = unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase()); String tab_alias = null; /* * If the input to the GBy has a tab alias for the column, then add an @@ -3638,7 +3605,7 @@ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver TypeInfo udafRetType = null; // 3.1 Obtain UDAF name - String aggName = SemanticAnalyzer.unescapeIdentifier(aggAst.getChild(0).getText()); + String aggName = unescapeIdentifier(aggAst.getChild(0).getText()); // 3.2 Rank functions type is 'int'/'double' if (FunctionRegistry.isRankingFunction(aggName)) { @@ -3815,7 +3782,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException for (ASTNode value : aggregationTrees.values()) { // 6.1 Determine type of UDAF // This is the GenericUDAF name - String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); + String aggName = unescapeIdentifier(value.getChild(0).getText()); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; @@ -3857,8 +3824,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException // 8. We create the group_by operator gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); - relToHiveColNameCalcitePosMap.put(gbRel, - buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); + relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver)); this.relToHiveRR.put(gbRel, groupByOutputRowResolver); } @@ -4065,8 +4031,7 @@ public RexNode apply(RelDataTypeField input) { // rowtype of sortrel is the type of it child; if child happens to be // synthetic project that we introduced then that projectrel would // contain the vc. - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( - outputRR, sortRel); + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR); relToHiveRR.put(sortRel, outputRR); relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); @@ -4108,8 +4073,7 @@ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcept RowResolver inputRR = relToHiveRR.get(srcRel); RowResolver outputRR = inputRR.duplicate(); - ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap( - outputRR, sortRel); + ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR); relToHiveRR.put(sortRel, outputRR); relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap); } @@ -4162,7 +4126,7 @@ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcept return oKeys; } - private RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) { + private RexWindowBound getBound(BoundarySpec bs) { RexWindowBound rwb = null; if (bs != null) { @@ -4216,8 +4180,8 @@ private int getWindowSpecIndx(ASTNode wndAST) { return wi; } - private Pair genWindowingProj(QB qb, WindowExpressionSpec wExpSpec, - RelNode srcRel) throws SemanticException { + private Pair genWindowingProj(WindowExpressionSpec wExpSpec, RelNode srcRel) + throws SemanticException { RexNode w = null; TypeInfo wHiveRetType = null; @@ -4258,8 +4222,8 @@ private int getWindowSpecIndx(ASTNode wndAST) { WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec(); List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR); List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR); - RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart(), converter); - RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd(), converter); + RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart()); + RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd()); boolean isRows = wndSpec.getWindowFrame().getWindowType() == WindowType.ROWS; w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs, @@ -4311,7 +4275,7 @@ private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet !cubeRollupGrpSetPresent)); } if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) { - Pair wtp = genWindowingProj(qb, wExprSpec, srcRel); + Pair wtp = genWindowingProj(wExprSpec, srcRel); projsForWindowSelOp.add(wtp.getKey()); // 6.2.2 Update Output Row Schema @@ -4384,8 +4348,7 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames); // 4. Keep track of colname-to-posmap && RR for new select - this.relToHiveColNameCalcitePosMap - .put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel)); + this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch)); this.relToHiveRR.put(selRel, out_rwsch); return selRel; @@ -4409,26 +4372,31 @@ private void setQueryHints(QB qb) throws SemanticException { } } + private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite) + throws SemanticException { + Pair retNodeRR = internalGenSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap, + outerRR, isAllColRefRewrite); + + QBParseInfo qbp = getQBParseInfo(qb); + String selClauseName = qbp.getClauseNames().iterator().next(); + ASTNode selExprList = qbp.getSelForClause(selClauseName); + if (isSelectDistinct(selExprList) && hasGroupBySibling(selExprList)) { + retNodeRR = genGBSelectDistinctPlan(retNodeRR); + } + + return retNodeRR; + } + /** - * NOTE: there can only be one select caluse since we don't handle multi - * destination insert. - * - * @throws SemanticException - */ - /** - * @param qb - * @param srcRel - * @param starSrcRel - * @param outerNameToPosMap - * @param outerRR + * NOTE: there can only be one select caluse since we don't handle multi destination insert. * @param isAllColRefRewrite * when it is true, it means that it is called from group by *, where we use * genSelectLogicalPlan to rewrite * * @return RelNode: the select relnode RowResolver: i.e., originalRR, the RR after select when there is an order by. - * @throws SemanticException */ - private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, - ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite) + private Pair internalGenSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel, + ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite) throws SemanticException { // 0. Generate a Select Node for Windowing // Exclude the newly-generated select columns from */etc. resolution. @@ -4602,36 +4570,35 @@ private void setQueryHints(QB qb) throws SemanticException { // 6.4 Build ExprNode corresponding to colums if (expr.getType() == HiveParser.TOK_ALLCOLREF) { - pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer - .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, + pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : + getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); selectStar = true; } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() && SemanticAnalyzer.isRegex( - SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) { + unescapeIdentifier(expr.getChild(0).getText()), conf)) { // In case the expression is a regex COL. // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case - pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), - null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, - qb.getAliases(), true); + pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, col_list, excludedColumns, + inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL - && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0) + && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) .getChild(0).getText().toLowerCase())) && !hasAsClause && !inputRR.getIsExprResolver() && SemanticAnalyzer.isRegex( - SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) { + unescapeIdentifier(expr.getChild(1).getText()), conf)) { // In case the expression is TABLE.COL (col can be regex). // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case pos = genColListRegex( - SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), - SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText() - .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos, + unescapeIdentifier(expr.getChild(1).getText()), + unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()), + expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true); } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI) && !(srcRel instanceof HiveAggregate)) { @@ -4697,7 +4664,13 @@ private void setQueryHints(QB qb) throws SemanticException { // TODO: support unselected columns in genericUDTF and windowing functions. // We examine the order by in this query block and adds in column needed // by order by in select list. - if (obAST != null && !(selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) && !isAllColRefRewrite) { + // + // If DISTINCT is present, it is not possible to ORDER BY unselected + // columns, and in fact adding all columns would change the behavior of + // DISTINCT, so we bypass this logic. + if (obAST != null + && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI + && !isAllColRefRewrite) { // 1. OB Expr sanity test // in strict mode, in the presence of order by, limit must be // specified @@ -4750,8 +4723,7 @@ public RexNode apply(RelDataTypeField input) { colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol()); groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo); } - relToHiveColNameCalcitePosMap.put(outputRel, - buildHiveToCalciteColumnMap(groupByOutputRowResolver, outputRel)); + relToHiveColNameCalcitePosMap.put(outputRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver)); this.relToHiveRR.put(outputRel, groupByOutputRowResolver); } @@ -4866,12 +4838,34 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str RelNode udtf = HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType, null); // Add new rel & its RR to the maps - relToHiveColNameCalcitePosMap.put(udtf, this.buildHiveToCalciteColumnMap(out_rwsch, udtf)); + relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(out_rwsch)); relToHiveRR.put(udtf, out_rwsch); return udtf; } + private Pair genGBSelectDistinctPlan(Pair srcNodeRR) + throws SemanticException { + RelNode srcRel = srcNodeRR.left; + + RelDataType inputRT = srcRel.getRowType(); + List groupSetPositions = + IntStream.range(0, inputRT.getFieldCount()).boxed().collect(Collectors.toList()); + + HiveAggregate distAgg = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel, + ImmutableBitSet.of(groupSetPositions), null, new ArrayList()); + + // This comes from genSelectLogicalPlan, must be a project assert srcRel instanceof HiveProject; + RowResolver outputRR = srcNodeRR.right; + if (outputRR == null) { + outputRR = relToHiveRR.get(srcRel); + } + + relToHiveRR.put(distAgg, outputRR); + relToHiveColNameCalcitePosMap.put(distAgg, relToHiveColNameCalcitePosMap.get(srcRel)); + return new Pair(distAgg, outputRR); + } + private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { switch (qbexpr.getOpcode()) { case NULLOP: @@ -4906,7 +4900,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, // 0. Check if we can handle the SubQuery; // canHandleQbForCbo returns null if the query can be handled. - String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled(), qb); + String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled()); if (reason != null) { String msg = "CBO can not handle Sub Query"; if (LOG.isDebugEnabled()) { @@ -4969,7 +4963,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, } // 2. Build Rel for where Clause - filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false); + filterRel = genFilterLogicalPlan(qb, srcRel, outerNameToPosMap, outerRR, false); srcRel = (filterRel == null) ? srcRel : filterRel; RelNode starSrcRel = srcRel; @@ -4978,7 +4972,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, srcRel = (gbRel == null) ? srcRel : gbRel; // 4. Build Rel for GB Having Clause - gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); + gbHavingRel = genGBHavingLogicalPlan(qb, srcRel); srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; // 5. Build Rel for Select Clause @@ -5012,7 +5006,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, newRR.putWithCheck(alias, tmp[1], colInfo.getInternalName(), newCi); } relToHiveRR.put(srcRel, newRR); - relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); + relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR)); } if (LOG.isDebugEnabled()) { @@ -5023,8 +5017,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB, return srcRel; } - private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel) - throws SemanticException { + private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException { RelNode gbFilter = null; QBParseInfo qbp = getQBParseInfo(qb); String destClauseName = qbp.getClauseNames().iterator().next(); @@ -5045,7 +5038,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) { + private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr) { ImmutableMap.Builder b = new ImmutableMap.Builder(); for (ColumnInfo ci : rr.getRowSchema().getSignature()) { b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName())); @@ -5136,16 +5129,6 @@ public Object post(Object t) { private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { return qb.getParseInfo(); } - - private List getTabAliases(RowResolver inputRR) { - List tabAliases = new ArrayList(); // TODO: this should be - // unique - for (ColumnInfo ci : inputRR.getColumnInfos()) { - tabAliases.add(ci.getTabAlias()); - } - - return tabAliases; - } } private enum TableType { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8dc5b34..97db527 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -26,6 +26,7 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; @@ -1727,10 +1728,6 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan if (qbp.getJoinExpr() != null) { queryProperties.setHasJoinFollowedByGroupBy(true); } - if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - throw new SemanticException(generateErrorMessage(ast, - ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg())); - } qbp.setGroupByExprForClause(ctx_1.dest, ast); skipRecursion = true; @@ -4194,30 +4191,32 @@ public static long unsetBit(long bitmap, int bitIdx) { } /** - * This function is a wrapper of parseInfo.getGroupByForClause which - * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY - * a,b,c. + * Returns the GBY, if present; + * DISTINCT, if present, will be handled when generating the SELECT. */ List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException { - if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) { - ASTNode selectExprs = parseInfo.getSelForClause(dest); - List result = new ArrayList(selectExprs == null ? 0 - : selectExprs.getChildCount()); - if (selectExprs != null) { - for (int i = 0; i < selectExprs.getChildCount(); ++i) { - if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { - continue; - } - // table.column AS alias - ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0); - result.add(grpbyExpr); + ASTNode selectExpr = parseInfo.getSelForClause(dest); + Collection aggregateFunction = parseInfo.getDestToAggregationExprs().get(dest).values(); + if (!(this instanceof CalcitePlanner) && isSelectDistinct(selectExpr) && hasGroupBySibling(selectExpr)) { + throw new SemanticException("SELECT DISTINCT with GROUP BY is only supported with CBO"); + } + + if (isSelectDistinct(selectExpr) && !hasGroupBySibling(selectExpr) && + !isAggregateInSelect(selectExpr, aggregateFunction)) { + List result = new ArrayList(selectExpr.getChildCount()); + for (int i = 0; i < selectExpr.getChildCount(); ++i) { + if (((ASTNode) selectExpr.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { + continue; } + // table.column AS alias + ASTNode grpbyExpr = (ASTNode) selectExpr.getChild(i).getChild(0); + result.add(grpbyExpr); } return result; } else { + // look for a true GBY ASTNode grpByExprs = parseInfo.getGroupByForClause(dest); - List result = new ArrayList(grpByExprs == null ? 0 - : grpByExprs.getChildCount()); + List result = new ArrayList(grpByExprs == null ? 0 : grpByExprs.getChildCount()); if (grpByExprs != null) { for (int i = 0; i < grpByExprs.getChildCount(); ++i) { ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i); @@ -4230,6 +4229,35 @@ public static long unsetBit(long bitmap, int bitIdx) { } } + protected boolean hasGroupBySibling(ASTNode selectExpr) { + boolean isGroupBy = false; + if (selectExpr.getParent() != null && selectExpr.getParent() instanceof Node) { + for (Node sibling : ((Node)selectExpr.getParent()).getChildren()) { + isGroupBy |= sibling instanceof ASTNode && ((ASTNode)sibling).getType() == HiveParser.TOK_GROUPBY; + } + } + + return isGroupBy; + } + + protected boolean isSelectDistinct(ASTNode expr) { + return expr.getType() == HiveParser.TOK_SELECTDI; + } + + protected boolean isAggregateInSelect(Node node, Collection aggregateFunction) { + if (node.getChildren() == null) { + return false; + } + + for (Node child : node.getChildren()) { + if (aggregateFunction.contains(child) || isAggregateInSelect(child, aggregateFunction)) { + return true; + } + } + + return false; + } + static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR, boolean includeFuncName, int colNum) { String colAlias = null; diff --git a/ql/src/test/queries/clientnegative/distinct_missing_groupby.q b/ql/src/test/queries/clientnegative/distinct_missing_groupby.q new file mode 100644 index 0000000..3411aa2 --- /dev/null +++ b/ql/src/test/queries/clientnegative/distinct_missing_groupby.q @@ -0,0 +1,2 @@ +--! qt:dataset:src +select distinct key, sum(key) from src; diff --git a/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q b/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q deleted file mode 100644 index cf0ac4b..0000000 --- a/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q +++ /dev/null @@ -1,4 +0,0 @@ ---! qt:dataset:src --- SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token ‘key’ - -select distinct * from src group by key; \ No newline at end of file diff --git a/ql/src/test/queries/clientnegative/udaf_invalid_place.q b/ql/src/test/queries/clientnegative/udaf_invalid_place.q deleted file mode 100644 index 3411aa2..0000000 --- a/ql/src/test/queries/clientnegative/udaf_invalid_place.q +++ /dev/null @@ -1,2 +0,0 @@ ---! qt:dataset:src -select distinct key, sum(key) from src; diff --git a/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q b/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q new file mode 100644 index 0000000..bb614fe --- /dev/null +++ b/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q @@ -0,0 +1,5 @@ +--! qt:dataset:src + +set hive.cbo.enable=false; + +select distinct key from src group by key diff --git a/ql/src/test/queries/clientpositive/distinct_groupby.q b/ql/src/test/queries/clientpositive/distinct_groupby.q new file mode 100644 index 0000000..2a25816 --- /dev/null +++ b/ql/src/test/queries/clientpositive/distinct_groupby.q @@ -0,0 +1,67 @@ +--! qt:dataset:src +--! qt:dataset:src1 + +explain select distinct key from src1 group by key,value; +select distinct key from src1 group by key,value; + +explain select distinct count(value) from src group by key; +select distinct count(value) from src group by key; + +explain select distinct count(*) from src1 where key in (128,146,150); +select distinct count(*) from src1 where key in (128,146,150); + +explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T; +select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T; + +explain select distinct count(*)+1 from src1; +select distinct count(*)+1 from src1; + +explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key; +select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key; + +explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a; +select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a; + +explain select distinct key from src1; +select distinct key from src1; + +explain select distinct * from src1; +select distinct * from src1; + +explain select distinct count(*) from src1 where key in (128,146,150) group by key; +select distinct count(*) from src1 where key in (128,146,150) group by key; + +explain select distinct key, count(*) from src1 where key in (128,146,150) group by key; +select distinct key, count(*) from src1 where key in (128,146,150) group by key; + +explain select distinct * from (select * from src1) as T; +select distinct * from (select * from src1) as T; + +explain select distinct * from (select count(*) from src1) as T; +select distinct * from (select count(*) from src1) as T; + +explain select distinct * from (select * from src1 where key in (128,146,150)) as T; +select distinct * from (select * from src1 where key in (128,146,150)) as T; + +explain select distinct key from (select * from src1 where key in (128,146,150)) as T; +select distinct key from (select * from src1 where key in (128,146,150)) as T; + +explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T; +select distinct * from (select count(*) from src1 where key in (128,146,150)) as T; + +explain select distinct sum(key) over () from src1; +select distinct sum(key) over () from src1; + +explain select distinct * from (select sum(key) over () from src1) as T; +select distinct * from (select sum(key) over () from src1) as T; + +explain select distinct count(*)+key from src1 group by key; +select distinct count(*)+key from src1 group by key; + +explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key; +select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key; + +-- should not project the virtual BLOCK_OFFSET et all columns +explain select distinct * from (select distinct * from src1) as T; +select distinct * from (select distinct * from src1) as T; + diff --git a/ql/src/test/queries/negative/wrong_distinct1.q b/ql/src/test/queries/negative/wrong_distinct1.q deleted file mode 100755 index 1e966ad..0000000 --- a/ql/src/test/queries/negative/wrong_distinct1.q +++ /dev/null @@ -1,3 +0,0 @@ ---! qt:dataset:src -FROM src -INSERT OVERWRITE TABLE dest1 SELECT DISTINCT src.key, substr(src.value,4,1) GROUP BY src.key diff --git a/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out b/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out new file mode 100644 index 0000000..ec36976 --- /dev/null +++ b/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10025]: Line 2:16 Expression not in GROUP BY key 'key' diff --git a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out b/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out deleted file mode 100644 index bafa21f..0000000 --- a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out +++ /dev/null @@ -1 +0,0 @@ -FAILED: SemanticException 4:36 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key' diff --git a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out b/ql/src/test/results/clientnegative/udaf_invalid_place.q.out deleted file mode 100644 index 50880e5..0000000 --- a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out +++ /dev/null @@ -1 +0,0 @@ -FAILED: SemanticException [Error 10128]: Line 2:21 Not yet supported place for UDAF 'sum' diff --git a/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out b/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out new file mode 100644 index 0000000..2c97cd8 --- /dev/null +++ b/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out @@ -0,0 +1 @@ +FAILED: SemanticException SELECT DISTINCT with GROUP BY is only supported with CBO diff --git a/ql/src/test/results/clientpositive/distinct_groupby.q.out b/ql/src/test/results/clientpositive/distinct_groupby.q.out new file mode 100644 index 0000000..fcaf2ff --- /dev/null +++ b/ql/src/test/results/clientpositive/distinct_groupby.q.out @@ -0,0 +1,1796 @@ +PREHOOK: query: explain select distinct key from src1 group by key,value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key from src1 group by key,value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key from src1 group by key,value +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key from src1 group by key,value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +146 +150 +213 +224 +238 +255 +273 +278 +311 +369 +401 +406 +66 +98 +PREHOOK: query: explain select distinct count(value) from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(value) from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(value) from src group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(value) from src group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +2 +3 +4 +5 +PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3 +PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3 +PREHOOK: query: explain select distinct count(*)+1 from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*)+1 from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 + 1L) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*)+1 from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*)+1 from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +26 +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 39 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1), count(_col3) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +37 37 +PREHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain select distinct key from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + +128 +146 +150 +213 +224 +238 +255 +273 +278 +311 +369 +401 +406 +66 +98 +PREHOOK: query: explain select distinct * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1 +PREHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 1 +146 1 +150 1 +PREHOOK: query: explain select distinct * from (select * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 +PREHOOK: query: explain select distinct * from (select count(*) from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select count(*) from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select count(*) from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +25 +PREHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +146 val_146 +150 val_150 +PREHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 +146 +150 +PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3 +PREHOOK: query: explain select distinct sum(key) over () from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct sum(key) over () from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct sum(key) over () from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct sum(key) over () from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3556.0 +PREHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 0 (type: int) + sort order: + + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: 0 ASC NULLS FIRST + partition by: 0 + raw input shape: + window functions: + window function definition + alias: sum_window_0 + arguments: _col0 + name: sum + window function: GenericUDAFSumDouble + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sum_window_0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select sum(key) over () from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select sum(key) over () from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +3556.0 +PREHOOK: query: explain select distinct count(*)+key from src1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(*)+key from src1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(*)+key from src1 group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(*)+key from src1 group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +NULL +67.0 +99.0 +129.0 +147.0 +151.0 +214.0 +225.0 +239.0 +256.0 +274.0 +279.0 +312.0 +370.0 +402.0 +407.0 +PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 39 Data size: 10413 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1), count(_col3) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col1 (type: bigint), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1 1 +2 2 +3 3 +4 4 +5 5 +PREHOOK: query: explain select distinct * from (select distinct * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select distinct * from (select distinct * from src1) as T +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select distinct * from (select distinct * from src1) as T +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### + + val_165 + val_193 + val_265 + val_27 + val_409 + val_484 +128 +146 val_146 +150 val_150 +213 val_213 +224 +238 val_238 +255 val_255 +273 val_273 +278 val_278 +311 val_311 +369 +401 val_401 +406 val_406 +66 val_66 +98 val_98 diff --git a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out b/ql/src/test/results/compiler/errors/wrong_distinct1.q.out deleted file mode 100644 index de81b5b..0000000 --- a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out +++ /dev/null @@ -1,2 +0,0 @@ -Semantic Exception: -3:88 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key' \ No newline at end of file