diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 83053d1..554df3c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -93,7 +93,7 @@
INVALID_MAPINDEX_CONSTANT(10031, "Non-constant expression for map indexes not supported"),
INVALID_MAPINDEX_TYPE(10032, "MAP key type does not match index expression type"),
NON_COLLECTION_TYPE(10033, "[] not valid on non-collection types"),
- SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"),
+ @Deprecated SELECT_DISTINCT_WITH_GROUPBY(10034, "SELECT DISTINCT and GROUP BY can not be in the same query"),
COLUMN_REPEATED_IN_PARTITIONING_COLS(10035, "Column repeated in partitioning columns"),
DUPLICATE_COLUMN_NAMES(10036, "Duplicate column name:"),
INVALID_BUCKET_NUMBER(10037, "Bucket number should be bigger than zero"),
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index de88783..aa9d8bb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -302,6 +302,8 @@
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
import javax.sql.DataSource;
@@ -863,7 +865,7 @@ public Object post(Object t) {
// Now check QB in more detail. canHandleQbForCbo returns null if query can
// be handled.
- msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage, qb);
+ msg = CalcitePlanner.canHandleQbForCbo(queryProperties, conf, true, needToLogMessage);
if (msg == null) {
return Pair.of(true, msg);
}
@@ -891,8 +893,8 @@ public Object post(Object t) {
* Query
* 2. Nested Subquery will return false for qbToChk.getIsQuery()
*/
- static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf,
- boolean topLevelQB, boolean verbose, QB qb) {
+ private static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf,
+ boolean topLevelQB, boolean verbose) {
if (!queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy()
&& !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript()
@@ -2512,8 +2514,7 @@ private RelNode genSetOpLogicalPlan(Opcode opcode, String alias, String leftalia
throw new SemanticException(ErrorMsg.UNSUPPORTED_SET_OPERATOR.getMsg(opcode.toString()));
}
relToHiveRR.put(setOpRel, setOpOutRR);
- relToHiveColNameCalcitePosMap.put(setOpRel,
- this.buildHiveToCalciteColumnMap(setOpOutRR, setOpRel));
+ relToHiveColNameCalcitePosMap.put(setOpRel, buildHiveToCalciteColumnMap(setOpOutRR));
return setOpRel;
}
@@ -2651,7 +2652,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r
if (!RowResolver.add(joinRR, newLeftRR)) {
LOG.warn("Duplicates detected when adding columns to RR: see previous message");
}
- relToHiveColNameCalcitePosMap.put(topRel, this.buildHiveToCalciteColumnMap(joinRR, topRel));
+ relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(joinRR));
relToHiveRR.put(topRel, joinRR);
// Introduce top project operator to remove additional column(s) that have
@@ -2682,7 +2683,7 @@ private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, RelNode r
}
// 4. Add new rel & its RR to the maps
- relToHiveColNameCalcitePosMap.put(topRel, this.buildHiveToCalciteColumnMap(topRR, topRel));
+ relToHiveColNameCalcitePosMap.put(topRel, buildHiveToCalciteColumnMap(topRR));
relToHiveRR.put(topRel, topRR);
return topRel;
}
@@ -2735,15 +2736,14 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a
if ((left.getToken().getType() == HiveParser.TOK_TABREF)
|| (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
- (ASTNode) left.getChild(0)).toLowerCase();
- leftTableAlias = left.getChildCount() == 1 ? tableName : SemanticAnalyzer
- .unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase());
+ String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)).toLowerCase();
+ leftTableAlias = left.getChildCount() == 1 ? tableName :
+ unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase());
// ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
// partitionTableFunctionSource partitioningSpec? expression*)
// guranteed to have an lias here: check done in processJoin
- leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer
- .unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias;
+ leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
+ unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias;
leftRel = aliasToRel.get(leftTableAlias);
} else if (SemanticAnalyzer.isJoinToken(left)) {
leftRel = genJoinLogicalPlan(left, aliasToRel);
@@ -2759,15 +2759,14 @@ private RelNode genJoinLogicalPlan(ASTNode joinParseTree, Map a
if ((right.getToken().getType() == HiveParser.TOK_TABREF)
|| (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
- (ASTNode) right.getChild(0)).toLowerCase();
- rightTableAlias = right.getChildCount() == 1 ? tableName : SemanticAnalyzer
- .unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase());
+ String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)).toLowerCase();
+ rightTableAlias = right.getChildCount() == 1 ? tableName :
+ unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase());
// ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
// partitionTableFunctionSource partitioningSpec? expression*)
// guranteed to have an lias here: check done in processJoin
- rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? SemanticAnalyzer
- .unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias;
+ rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
+ unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias;
rightRel = aliasToRel.get(rightTableAlias);
} else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
rightRel = genLateralViewPlans(right, aliasToRel);
@@ -2808,7 +2807,6 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc
// Virtual Cols
// 3.1 Add Column info for non partion cols (Object Inspector fields)
- @SuppressWarnings("deprecation")
StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer()
.getObjectInspector();
List extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
@@ -2985,8 +2983,7 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc
}
// 6. Add Schema(RR) to RelNode-Schema map
- ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr,
- tableRel);
+ ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr);
relToHiveRR.put(tableRel, rr);
relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap);
} catch (Exception e) {
@@ -3173,17 +3170,16 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al
case HiveParser.TOK_TABREF:
case HiveParser.TOK_SUBQUERY:
case HiveParser.TOK_PTBLFUNCTION:
- String inputTableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
- (ASTNode) next.getChild(0)).toLowerCase();
+ String inputTableName = getUnescapedUnqualifiedTableName((ASTNode) next.getChild(0)).toLowerCase();
String inputTableAlias;
if (next.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
// ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
// partitionTableFunctionSource partitioningSpec? expression*)
// ptf node guaranteed to have an alias here
- inputTableAlias = SemanticAnalyzer.unescapeIdentifier(next.getChild(1).getText().toLowerCase());
+ inputTableAlias = unescapeIdentifier(next.getChild(1).getText().toLowerCase());
} else {
inputTableAlias = next.getChildCount() == 1 ? inputTableName :
- SemanticAnalyzer.unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase());
+ unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase());
}
inputRel = aliasToRel.get(inputTableAlias);
break;
@@ -3220,8 +3216,6 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al
(ASTNode) functionCall.getChild(1);
// Output types. They will be the concatenation of the input refs types and
// the types of the expressions for the lateral view generated rows
- List outputFieldTypes = new ArrayList<>(inputRefsTypes);
- List outputFieldNames = new ArrayList<>(inputRel.getRowType().getFieldNames());
// Generate all expressions from lateral view
ExprNodeDesc valuesExpr = genExprNodeDesc(valuesClause, inputRR, false);
RexCall convertedOriginalValuesExpr = (RexCall) new RexNodeConverter(this.cluster, inputRel.getRowType(),
@@ -3293,7 +3287,6 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al
columnAliases.add(SemanticAnalyzer.getColumnInternalName(i));
}
}
- int numInputExprs = inputRR.getColumnInfos().size();
ListTypeInfo listTypeInfo = (ListTypeInfo) valuesExpr.getTypeInfo(); // Array should have ListTypeInfo
StructTypeInfo typeInfos = (StructTypeInfo) listTypeInfo.getListElementTypeInfo(); // Within the list, we extract types
for (int i = 0, j = 0; i < columnAliases.size(); i++) {
@@ -3305,8 +3298,7 @@ private RelNode genLateralViewPlans(ASTNode lateralView, Map al
new ColumnInfo(internalColName, typeInfos.getAllStructFieldTypeInfos().get(i),
tableAlias, false));
}
- this.relToHiveColNameCalcitePosMap
- .put(htfsRel, buildHiveToCalciteColumnMap(outputRR, htfsRel));
+ this.relToHiveColNameCalcitePosMap.put(htfsRel, buildHiveToCalciteColumnMap(outputRR));
this.relToHiveRR.put(htfsRel, outputRR);
// 4) Return new operator
@@ -3378,8 +3370,8 @@ private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean
}
private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
- Map aliasToRel, ImmutableMap outerNameToPosMap,
- RowResolver outerRR, boolean forHavingClause) throws SemanticException {
+ ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean forHavingClause)
+ throws SemanticException {
Map subQueryToRelNode = new HashMap<>();
boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause,
@@ -3406,39 +3398,15 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
}
}
- private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException {
- RowResolver iRR = relToHiveRR.get(srcRel);
- RowResolver oRR = new RowResolver();
- RowResolver.add(oRR, iRR, numColumns);
-
- List calciteColLst = new ArrayList();
- List oFieldNames = new ArrayList();
- RelDataType iType = srcRel.getRowType();
-
- for (int i = 0; i < iType.getFieldCount(); i++) {
- RelDataTypeField fType = iType.getFieldList().get(i);
- String fName = iType.getFieldNames().get(i);
- calciteColLst.add(cluster.getRexBuilder().makeInputRef(fType.getType(), i));
- oFieldNames.add(fName);
- }
-
- HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, oFieldNames);
-
- this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(oRR, selRel));
- this.relToHiveRR.put(selRel, oRR);
- return selRel;
- }
-
- private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel,
- ImmutableMap outerNameToPosMap, RowResolver outerRR,
- boolean forHavingClause) throws SemanticException {
+ private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, ImmutableMap outerNameToPosMap,
+ RowResolver outerRR, boolean forHavingClause) throws SemanticException {
RelNode filterRel = null;
Iterator whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
.iterator();
if (whereClauseIterator.hasNext()) {
filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
- aliasToRel, outerNameToPosMap, outerRR, forHavingClause);
+ outerNameToPosMap, outerRR, forHavingClause);
}
return filterRel;
@@ -3462,9 +3430,8 @@ private AggInfo(List aggParams, TypeInfo returnType, String udfNam
}
}
- private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbChildProjLst,
- RexNodeConverter converter, HashMap rexNodeToPosMap,
- Integer childProjLstIndx) throws SemanticException {
+ private AggregateCall convertGBAgg(AggInfo agg, List gbChildProjLst, RexNodeConverter converter,
+ HashMap rexNodeToPosMap, Integer childProjLstIndx) throws SemanticException {
// 1. Get agg fn ret type in Calcite
RelDataType aggFnRetType = TypeConverter.convert(agg.m_returnType,
@@ -3538,7 +3505,7 @@ private RelNode genGBRelNode(List gbExprs, List aggInfoLs
List aggregateCalls = Lists.newArrayList();
for (AggInfo agg : aggInfoLst) {
- aggregateCalls.add(convertGBAgg(agg, srcRel, gbChildProjLst, converter, rexNodeToPosMap,
+ aggregateCalls.add(convertGBAgg(agg, gbChildProjLst, converter, rexNodeToPosMap,
gbChildProjLst.size()));
}
if (hasGroupSets) {
@@ -3583,12 +3550,12 @@ private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo,
RowResolver gByInputRR, RowResolver gByRR) {
if (gByExpr.getType() == HiveParser.DOT
&& gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL) {
- String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getChild(0)
+ String tab_alias = unescapeIdentifier(gByExpr.getChild(0).getChild(0)
.getText().toLowerCase());
- String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase());
+ String col_alias = unescapeIdentifier(gByExpr.getChild(1).getText().toLowerCase());
gByRR.put(tab_alias, col_alias, colInfo);
} else if (gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL) {
- String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase());
+ String col_alias = unescapeIdentifier(gByExpr.getChild(0).getText().toLowerCase());
String tab_alias = null;
/*
* If the input to the GBy has a tab alias for the column, then add an
@@ -3642,7 +3609,7 @@ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver
TypeInfo udafRetType = null;
// 3.1 Obtain UDAF name
- String aggName = SemanticAnalyzer.unescapeIdentifier(aggAst.getChild(0).getText());
+ String aggName = unescapeIdentifier(aggAst.getChild(0).getText());
// 3.2 Rank functions type is 'int'/'double'
if (FunctionRegistry.isRankingFunction(aggName)) {
@@ -3819,7 +3786,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException
for (ASTNode value : aggregationTrees.values()) {
// 6.1 Determine type of UDAF
// This is the GenericUDAF name
- String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText());
+ String aggName = unescapeIdentifier(value.getChild(0).getText());
boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI;
boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
@@ -3861,8 +3828,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException
// 8. We create the group_by operator
gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel);
- relToHiveColNameCalcitePosMap.put(gbRel,
- buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel));
+ relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
this.relToHiveRR.put(gbRel, groupByOutputRowResolver);
}
@@ -4069,8 +4035,7 @@ public RexNode apply(RelDataTypeField input) {
// rowtype of sortrel is the type of it child; if child happens to be
// synthetic project that we introduced then that projectrel would
// contain the vc.
- ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(
- outputRR, sortRel);
+ ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToHiveRR.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
@@ -4112,8 +4077,7 @@ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcept
RowResolver inputRR = relToHiveRR.get(srcRel);
RowResolver outputRR = inputRR.duplicate();
- ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(
- outputRR, sortRel);
+ ImmutableMap hiveColNameCalcitePosMap = buildHiveToCalciteColumnMap(outputRR);
relToHiveRR.put(sortRel, outputRR);
relToHiveColNameCalcitePosMap.put(sortRel, hiveColNameCalcitePosMap);
}
@@ -4166,7 +4130,7 @@ private RelNode genLimitLogicalPlan(QB qb, RelNode srcRel) throws SemanticExcept
return oKeys;
}
- private RexWindowBound getBound(BoundarySpec bs, RexNodeConverter converter) {
+ private RexWindowBound getBound(BoundarySpec bs) {
RexWindowBound rwb = null;
if (bs != null) {
@@ -4220,8 +4184,8 @@ private int getWindowSpecIndx(ASTNode wndAST) {
return wi;
}
- private Pair genWindowingProj(QB qb, WindowExpressionSpec wExpSpec,
- RelNode srcRel) throws SemanticException {
+ private Pair genWindowingProj(WindowExpressionSpec wExpSpec, RelNode srcRel)
+ throws SemanticException {
RexNode w = null;
TypeInfo wHiveRetType = null;
@@ -4262,8 +4226,8 @@ private int getWindowSpecIndx(ASTNode wndAST) {
WindowSpec wndSpec = ((WindowFunctionSpec) wExpSpec).getWindowSpec();
List partitionKeys = getPartitionKeys(wndSpec.getPartition(), converter, inputRR);
List orderKeys = getOrderKeys(wndSpec.getOrder(), converter, inputRR);
- RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart(), converter);
- RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd(), converter);
+ RexWindowBound upperBound = getBound(wndSpec.getWindowFrame().getStart());
+ RexWindowBound lowerBound = getBound(wndSpec.getWindowFrame().getEnd());
boolean isRows = wndSpec.getWindowFrame().getWindowType() == WindowType.ROWS;
w = cluster.getRexBuilder().makeOver(calciteAggFnRetType, calciteAggFn, calciteAggFnArgs,
@@ -4315,7 +4279,7 @@ private RelNode genSelectForWindowing(QB qb, RelNode srcRel, HashSet
!cubeRollupGrpSetPresent));
}
if (out_rwsch.getExpression(wExprSpec.getExpression()) == null) {
- Pair wtp = genWindowingProj(qb, wExprSpec, srcRel);
+ Pair wtp = genWindowingProj(wExprSpec, srcRel);
projsForWindowSelOp.add(wtp.getKey());
// 6.2.2 Update Output Row Schema
@@ -4388,8 +4352,7 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw
HiveRelNode selRel = HiveProject.create(srcRel, calciteColLst, columnNames);
// 4. Keep track of colname-to-posmap && RR for new select
- this.relToHiveColNameCalcitePosMap
- .put(selRel, buildHiveToCalciteColumnMap(out_rwsch, selRel));
+ this.relToHiveColNameCalcitePosMap.put(selRel, buildHiveToCalciteColumnMap(out_rwsch));
this.relToHiveRR.put(selRel, out_rwsch);
return selRel;
@@ -4413,26 +4376,31 @@ private void setQueryHints(QB qb) throws SemanticException {
}
}
+ private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
+ ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
+ throws SemanticException {
+ Pair retNodeRR = internalGenSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap,
+ outerRR, isAllColRefRewrite);
+
+ QBParseInfo qbp = getQBParseInfo(qb);
+ String selClauseName = qbp.getClauseNames().iterator().next();
+ ASTNode selExprList = qbp.getSelForClause(selClauseName);
+ if (isSelectDistinct(selExprList) && hasGroupBySibling(selExprList)) {
+ retNodeRR = genGBSelectDistinctPlan(retNodeRR);
+ }
+
+ return retNodeRR;
+ }
+
/**
- * NOTE: there can only be one select caluse since we don't handle multi
- * destination insert.
- *
- * @throws SemanticException
- */
- /**
- * @param qb
- * @param srcRel
- * @param starSrcRel
- * @param outerNameToPosMap
- * @param outerRR
+ * NOTE: there can only be one select caluse since we don't handle multi destination insert.
* @param isAllColRefRewrite
* when it is true, it means that it is called from group by *, where we use
* genSelectLogicalPlan to rewrite *
* @return RelNode: the select relnode RowResolver: i.e., originalRR, the RR after select when there is an order by.
- * @throws SemanticException
*/
- private Pair genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
- ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
+ private Pair internalGenSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
+ ImmutableMap outerNameToPosMap, RowResolver outerRR, boolean isAllColRefRewrite)
throws SemanticException {
// 0. Generate a Select Node for Windowing
// Exclude the newly-generated select columns from */etc. resolution.
@@ -4606,36 +4574,35 @@ private void setQueryHints(QB qb) throws SemanticException {
// 6.4 Build ExprNode corresponding to colums
if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
- pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer
- .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list,
+ pos = genColListRegex(".*", expr.getChildCount() == 0 ? null :
+ getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list,
excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true);
selectStar = true;
} else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
&& !hasAsClause
&& !inputRR.getIsExprResolver()
&& SemanticAnalyzer.isRegex(
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) {
+ unescapeIdentifier(expr.getChild(0).getText()), conf)) {
// In case the expression is a regex COL.
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
- pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()),
- null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch,
- qb.getAliases(), true);
+ pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), null, expr, col_list, excludedColumns,
+ inputRR, starRR, pos, out_rwsch, qb.getAliases(), true);
} else if (expr.getType() == HiveParser.DOT
&& expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
- && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0)
+ && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0)
.getChild(0).getText().toLowerCase()))
&& !hasAsClause
&& !inputRR.getIsExprResolver()
&& SemanticAnalyzer.isRegex(
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) {
+ unescapeIdentifier(expr.getChild(1).getText()), conf)) {
// In case the expression is TABLE.COL (col can be regex).
// This can only happen without AS clause
// We don't allow this for ExprResolver - the Group By case
pos = genColListRegex(
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()),
- SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()
- .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos,
+ unescapeIdentifier(expr.getChild(1).getText()),
+ unescapeIdentifier(expr.getChild(0).getChild(0).getText().toLowerCase()),
+ expr, col_list, excludedColumns, inputRR, starRR, pos,
out_rwsch, qb.getAliases(), true);
} else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI)
&& !(srcRel instanceof HiveAggregate)) {
@@ -4701,7 +4668,13 @@ private void setQueryHints(QB qb) throws SemanticException {
// TODO: support unselected columns in genericUDTF and windowing functions.
// We examine the order by in this query block and adds in column needed
// by order by in select list.
- if (obAST != null && !(selForWindow != null && selExprList.getToken().getType() == HiveParser.TOK_SELECTDI) && !isAllColRefRewrite) {
+ //
+ // If DISTINCT is present, it is not possible to ORDER BY unselected
+ // columns, and in fact adding all columns would change the behavior of
+ // DISTINCT, so we bypass this logic.
+ if (obAST != null
+ && selExprList.getToken().getType() != HiveParser.TOK_SELECTDI
+ && !isAllColRefRewrite) {
// 1. OB Expr sanity test
// in strict mode, in the presence of order by, limit must be
// specified
@@ -4754,8 +4727,7 @@ public RexNode apply(RelDataTypeField input) {
colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol());
groupByOutputRowResolver.put(colInfo.getTabAlias(), colInfo.getAlias(), newColInfo);
}
- relToHiveColNameCalcitePosMap.put(outputRel,
- buildHiveToCalciteColumnMap(groupByOutputRowResolver, outputRel));
+ relToHiveColNameCalcitePosMap.put(outputRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver));
this.relToHiveRR.put(outputRel, groupByOutputRowResolver);
}
@@ -4870,12 +4842,34 @@ private RelNode genUDTFPlan(GenericUDTF genericUDTF, String genericUDTFName, Str
RelNode udtf = HiveTableFunctionScan.create(cluster, traitSet, list, rexNode, null, retType,
null);
// Add new rel & its RR to the maps
- relToHiveColNameCalcitePosMap.put(udtf, this.buildHiveToCalciteColumnMap(out_rwsch, udtf));
+ relToHiveColNameCalcitePosMap.put(udtf, buildHiveToCalciteColumnMap(out_rwsch));
relToHiveRR.put(udtf, out_rwsch);
return udtf;
}
+ private Pair genGBSelectDistinctPlan(Pair srcNodeRR)
+ throws SemanticException {
+ RelNode srcRel = srcNodeRR.left;
+
+ RelDataType inputRT = srcRel.getRowType();
+ List groupSetPositions =
+ IntStream.range(0, inputRT.getFieldCount()).boxed().collect(Collectors.toList());
+
+ HiveAggregate distAgg = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), srcRel,
+ ImmutableBitSet.of(groupSetPositions), null, new ArrayList());
+
+ // This comes from genSelectLogicalPlan, must be a project assert srcRel instanceof HiveProject;
+ RowResolver outputRR = srcNodeRR.right;
+ if (outputRR == null) {
+ outputRR = relToHiveRR.get(srcRel);
+ }
+
+ relToHiveRR.put(distAgg, outputRR);
+ relToHiveColNameCalcitePosMap.put(distAgg, relToHiveColNameCalcitePosMap.get(srcRel));
+ return new Pair(distAgg, outputRR);
+ }
+
private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {
switch (qbexpr.getOpcode()) {
case NULLOP:
@@ -4910,7 +4904,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
// 0. Check if we can handle the SubQuery;
// canHandleQbForCbo returns null if the query can be handled.
- String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled(), qb);
+ String reason = canHandleQbForCbo(queryProperties, conf, false, LOG.isDebugEnabled());
if (reason != null) {
String msg = "CBO can not handle Sub Query";
if (LOG.isDebugEnabled()) {
@@ -4973,7 +4967,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
}
// 2. Build Rel for where Clause
- filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false);
+ filterRel = genFilterLogicalPlan(qb, srcRel, outerNameToPosMap, outerRR, false);
srcRel = (filterRel == null) ? srcRel : filterRel;
RelNode starSrcRel = srcRel;
@@ -4982,7 +4976,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
srcRel = (gbRel == null) ? srcRel : gbRel;
// 4. Build Rel for GB Having Clause
- gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel);
+ gbHavingRel = genGBHavingLogicalPlan(qb, srcRel);
srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel;
// 5. Build Rel for Select Clause
@@ -5016,7 +5010,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
newRR.putWithCheck(alias, tmp[1], colInfo.getInternalName(), newCi);
}
relToHiveRR.put(srcRel, newRR);
- relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel));
+ relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR));
}
if (LOG.isDebugEnabled()) {
@@ -5027,8 +5021,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
return srcRel;
}
- private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map aliasToRel)
- throws SemanticException {
+ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
String destClauseName = qbp.getClauseNames().iterator().next();
@@ -5049,7 +5042,7 @@ private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel, Map buildHiveToCalciteColumnMap(RowResolver rr, RelNode rNode) {
+ private ImmutableMap buildHiveToCalciteColumnMap(RowResolver rr) {
ImmutableMap.Builder b = new ImmutableMap.Builder();
for (ColumnInfo ci : rr.getRowSchema().getSignature()) {
b.put(ci.getInternalName(), rr.getPosition(ci.getInternalName()));
@@ -5140,16 +5133,6 @@ public Object post(Object t) {
private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException {
return qb.getParseInfo();
}
-
- private List getTabAliases(RowResolver inputRR) {
- List tabAliases = new ArrayList(); // TODO: this should be
- // unique
- for (ColumnInfo ci : inputRR.getColumnInfos()) {
- tabAliases.add(ci.getTabAlias());
- }
-
- return tabAliases;
- }
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 8d1309d..214e38a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -26,6 +26,7 @@
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
@@ -1727,10 +1728,6 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan
if (qbp.getJoinExpr() != null) {
queryProperties.setHasJoinFollowedByGroupBy(true);
}
- if (qbp.getSelForClause(ctx_1.dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- throw new SemanticException(generateErrorMessage(ast,
- ErrorMsg.SELECT_DISTINCT_WITH_GROUPBY.getMsg()));
- }
qbp.setGroupByExprForClause(ctx_1.dest, ast);
skipRecursion = true;
@@ -4194,30 +4191,32 @@ public static long unsetBit(long bitmap, int bitIdx) {
}
/**
- * This function is a wrapper of parseInfo.getGroupByForClause which
- * automatically translates SELECT DISTINCT a,b,c to SELECT a,b,c GROUP BY
- * a,b,c.
+ * Returns the GBY, if present;
+ * DISTINCT, if present, will be handled when generating the SELECT.
*/
List getGroupByForClause(QBParseInfo parseInfo, String dest) throws SemanticException {
- if (parseInfo.getSelForClause(dest).getToken().getType() == HiveParser.TOK_SELECTDI) {
- ASTNode selectExprs = parseInfo.getSelForClause(dest);
- List result = new ArrayList(selectExprs == null ? 0
- : selectExprs.getChildCount());
- if (selectExprs != null) {
- for (int i = 0; i < selectExprs.getChildCount(); ++i) {
- if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) {
- continue;
- }
- // table.column AS alias
- ASTNode grpbyExpr = (ASTNode) selectExprs.getChild(i).getChild(0);
- result.add(grpbyExpr);
+ ASTNode selectExpr = parseInfo.getSelForClause(dest);
+ Collection aggregateFunction = parseInfo.getDestToAggregationExprs().get(dest).values();
+ if (!(this instanceof CalcitePlanner) && isSelectDistinct(selectExpr) && hasGroupBySibling(selectExpr)) {
+ throw new SemanticException("SELECT DISTINCT with GROUP BY is only supported with CBO");
+ }
+
+ if (isSelectDistinct(selectExpr) && !hasGroupBySibling(selectExpr) &&
+ !isAggregateInSelect(selectExpr, aggregateFunction)) {
+ List result = new ArrayList(selectExpr.getChildCount());
+ for (int i = 0; i < selectExpr.getChildCount(); ++i) {
+ if (((ASTNode) selectExpr.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) {
+ continue;
}
+ // table.column AS alias
+ ASTNode grpbyExpr = (ASTNode) selectExpr.getChild(i).getChild(0);
+ result.add(grpbyExpr);
}
return result;
} else {
+ // look for a true GBY
ASTNode grpByExprs = parseInfo.getGroupByForClause(dest);
- List result = new ArrayList(grpByExprs == null ? 0
- : grpByExprs.getChildCount());
+ List result = new ArrayList(grpByExprs == null ? 0 : grpByExprs.getChildCount());
if (grpByExprs != null) {
for (int i = 0; i < grpByExprs.getChildCount(); ++i) {
ASTNode grpbyExpr = (ASTNode) grpByExprs.getChild(i);
@@ -4230,6 +4229,35 @@ public static long unsetBit(long bitmap, int bitIdx) {
}
}
+ protected boolean hasGroupBySibling(ASTNode selectExpr) {
+ boolean isGroupBy = false;
+ if (selectExpr.getParent() != null && selectExpr.getParent() instanceof Node) {
+ for (Node sibling : ((Node)selectExpr.getParent()).getChildren()) {
+ isGroupBy |= sibling instanceof ASTNode && ((ASTNode)sibling).getType() == HiveParser.TOK_GROUPBY;
+ }
+ }
+
+ return isGroupBy;
+ }
+
+ protected boolean isSelectDistinct(ASTNode expr) {
+ return expr.getType() == HiveParser.TOK_SELECTDI;
+ }
+
+ protected boolean isAggregateInSelect(Node node, Collection aggregateFunction) {
+ if (node.getChildren() == null) {
+ return false;
+ }
+
+ for (Node child : node.getChildren()) {
+ if (aggregateFunction.contains(child) || isAggregateInSelect(child, aggregateFunction)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
static String[] getColAlias(ASTNode selExpr, String defaultName,
RowResolver inputRR, boolean includeFuncName, int colNum) {
String colAlias = null;
diff --git a/ql/src/test/queries/clientnegative/distinct_missing_groupby.q b/ql/src/test/queries/clientnegative/distinct_missing_groupby.q
new file mode 100644
index 0000000..3411aa2
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/distinct_missing_groupby.q
@@ -0,0 +1,2 @@
+--! qt:dataset:src
+select distinct key, sum(key) from src;
diff --git a/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q b/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q
deleted file mode 100644
index cf0ac4b..0000000
--- a/ql/src/test/queries/clientnegative/selectDistinctStarNeg_2.q
+++ /dev/null
@@ -1,4 +0,0 @@
---! qt:dataset:src
--- SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token ‘key’
-
-select distinct * from src group by key;
\ No newline at end of file
diff --git a/ql/src/test/queries/clientnegative/udaf_invalid_place.q b/ql/src/test/queries/clientnegative/udaf_invalid_place.q
deleted file mode 100644
index 3411aa2..0000000
--- a/ql/src/test/queries/clientnegative/udaf_invalid_place.q
+++ /dev/null
@@ -1,2 +0,0 @@
---! qt:dataset:src
-select distinct key, sum(key) from src;
diff --git a/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q b/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q
new file mode 100644
index 0000000..bb614fe
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/wrong_distinct_group_by_without_cbo.q
@@ -0,0 +1,5 @@
+--! qt:dataset:src
+
+set hive.cbo.enable=false;
+
+select distinct key from src group by key
diff --git a/ql/src/test/queries/clientpositive/distinct_groupby.q b/ql/src/test/queries/clientpositive/distinct_groupby.q
new file mode 100644
index 0000000..abfef07
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/distinct_groupby.q
@@ -0,0 +1,76 @@
+--! qt:dataset:src
+--! qt:dataset:src1
+
+explain select distinct key from src1 group by key,value;
+select distinct key from src1 group by key,value;
+
+explain select distinct count(value) from src group by key;
+select distinct count(value) from src group by key;
+
+explain select distinct count(*) from src1 where key in (128,146,150);
+select distinct count(*) from src1 where key in (128,146,150);
+
+explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T;
+select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T;
+
+explain select distinct count(*)+1 from src1;
+select distinct count(*)+1 from src1;
+
+explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key;
+select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key;
+
+explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a;
+select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a;
+
+explain select distinct key from src1;
+select distinct key from src1;
+
+explain select distinct * from src1;
+select distinct * from src1;
+
+explain select distinct count(*) from src1 where key in (128,146,150) group by key;
+select distinct count(*) from src1 where key in (128,146,150) group by key;
+
+explain select distinct key, count(*) from src1 where key in (128,146,150) group by key;
+select distinct key, count(*) from src1 where key in (128,146,150) group by key;
+
+explain select distinct * from (select * from src1) as T;
+select distinct * from (select * from src1) as T;
+
+explain select distinct * from (select count(*) from src1) as T;
+select distinct * from (select count(*) from src1) as T;
+
+explain select distinct * from (select * from src1 where key in (128,146,150)) as T;
+select distinct * from (select * from src1 where key in (128,146,150)) as T;
+
+explain select distinct key from (select * from src1 where key in (128,146,150)) as T;
+select distinct key from (select * from src1 where key in (128,146,150)) as T;
+
+explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T;
+select distinct * from (select count(*) from src1 where key in (128,146,150)) as T;
+
+explain select distinct sum(key) over () from src1;
+select distinct sum(key) over () from src1;
+
+explain select distinct * from (select sum(key) over () from src1) as T;
+select distinct * from (select sum(key) over () from src1) as T;
+
+explain select distinct value, key, count(1) over (partition by value) from src1;
+select distinct value, key, count(1) over (partition by value) from src1;
+
+explain select value, key, count(1) over (partition by value) from src1 group by value, key;
+select value, key, count(1) over (partition by value) from src1 group by value, key;
+
+explain select value, key, count(1) over (partition by value) from src1;
+select value, key, count(1) over (partition by value) from src1;
+
+explain select distinct count(*)+key from src1 group by key;
+select distinct count(*)+key from src1 group by key;
+
+explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key;
+select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key;
+
+-- should not project the virtual BLOCK_OFFSET et all columns
+explain select distinct * from (select distinct * from src1) as T;
+select distinct * from (select distinct * from src1) as T;
+
diff --git a/ql/src/test/queries/negative/wrong_distinct1.q b/ql/src/test/queries/negative/wrong_distinct1.q
deleted file mode 100755
index 1e966ad..0000000
--- a/ql/src/test/queries/negative/wrong_distinct1.q
+++ /dev/null
@@ -1,3 +0,0 @@
---! qt:dataset:src
-FROM src
-INSERT OVERWRITE TABLE dest1 SELECT DISTINCT src.key, substr(src.value,4,1) GROUP BY src.key
diff --git a/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out b/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out
new file mode 100644
index 0000000..ec36976
--- /dev/null
+++ b/ql/src/test/results/clientnegative/distinct_missing_groupby.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10025]: Line 2:16 Expression not in GROUP BY key 'key'
diff --git a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out b/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out
deleted file mode 100644
index bafa21f..0000000
--- a/ql/src/test/results/clientnegative/selectDistinctStarNeg_2.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException 4:36 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key'
diff --git a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out b/ql/src/test/results/clientnegative/udaf_invalid_place.q.out
deleted file mode 100644
index 50880e5..0000000
--- a/ql/src/test/results/clientnegative/udaf_invalid_place.q.out
+++ /dev/null
@@ -1 +0,0 @@
-FAILED: SemanticException [Error 10128]: Line 2:21 Not yet supported place for UDAF 'sum'
diff --git a/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out b/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out
new file mode 100644
index 0000000..2c97cd8
--- /dev/null
+++ b/ql/src/test/results/clientnegative/wrong_distinct_group_by_without_cbo.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException SELECT DISTINCT with GROUP BY is only supported with CBO
diff --git a/ql/src/test/results/clientpositive/distinct_groupby.q.out b/ql/src/test/results/clientpositive/distinct_groupby.q.out
new file mode 100644
index 0000000..b7a0e68
--- /dev/null
+++ b/ql/src/test/results/clientpositive/distinct_groupby.q.out
@@ -0,0 +1,2158 @@
+PREHOOK: query: explain select distinct key from src1 group by key,value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key from src1 group by key,value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string)
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key from src1 group by key,value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key from src1 group by key,value
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+128
+146
+150
+213
+224
+238
+255
+273
+278
+311
+369
+401
+406
+66
+98
+PREHOOK: query: explain select distinct count(value) from src group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(value) from src group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(value)
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 125 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(value) from src group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(value) from src group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+1
+2
+3
+4
+5
+PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select distinct count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain select distinct count(*)+1 from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*)+1 from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 + 1L) (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*)+1 from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*)+1 from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+26
+PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col1, _col3
+ Statistics: Num rows: 39 Data size: 7020 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col1), count(_col3)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+37 37
+PREHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct c from (select distinct key, count(*) as c from src1 where key in (128,146,150) group by key) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+1
+PREHOOK: query: explain select distinct key from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+128
+146
+150
+213
+224
+238
+255
+273
+278
+311
+369
+401
+406
+66
+98
+PREHOOK: query: explain select distinct * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
+PREHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+1
+PREHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key, count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key, count(*) from src1 where key in (128,146,150) group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128 1
+146 1
+150 1
+PREHOOK: query: explain select distinct * from (select * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
+PREHOOK: query: explain select distinct * from (select count(*) from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select count(*) from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select count(*) from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select count(*) from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+25
+PREHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 1050 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128
+146 val_146
+150 val_150
+PREHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct key from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct key from (select * from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+128
+146
+150
+PREHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ filterExpr: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (UDFToDouble(key)) IN (128.0D, 146.0D, 150.0D) (type: boolean)
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select count(*) from src1 where key in (128,146,150)) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3
+PREHOOK: query: explain select distinct sum(key) over () from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct sum(key) over () from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 0 (type: int)
+ sort order: +
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: 0 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col0
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: sum_window_0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct sum(key) over () from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct sum(key) over () from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3556.0
+PREHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select sum(key) over () from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: 0 (type: int)
+ sort order: +
+ Map-reduce partition columns: 0 (type: int)
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: 0 ASC NULLS FIRST
+ partition by: 0
+ raw input shape:
+ window functions:
+ window function definition
+ alias: sum_window_0
+ arguments: _col0
+ name: sum
+ window function: GenericUDAFSumDouble
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: sum_window_0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 25 Data size: 8850 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select sum(key) over () from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select sum(key) over () from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+3556.0
+PREHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: value (type: string)
+ sort order: +
+ Map-reduce partition columns: value (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: 1
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ sort order: +++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 7
+ 128 7
+ 224 7
+ 369 7
+val_146 146 1
+val_150 150 1
+val_165 1
+val_193 1
+val_213 213 1
+val_238 238 1
+val_255 255 1
+val_265 1
+val_27 1
+val_273 273 1
+val_278 278 1
+val_311 311 1
+val_401 401 1
+val_406 406 1
+val_409 1
+val_484 1
+val_66 66 1
+val_98 98 1
+PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1 group by value, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: 1
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select value, key, count(1) over (partition by value) from src1 group by value, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 224 4
+ 128 4
+ 369 4
+ 4
+val_146 146 1
+val_150 150 1
+val_165 1
+val_193 1
+val_213 213 1
+val_238 238 1
+val_255 255 1
+val_265 1
+val_27 1
+val_273 273 1
+val_278 278 1
+val_311 311 1
+val_401 401 1
+val_406 406 1
+val_409 1
+val_484 1
+val_66 66 1
+val_98 98 1
+PREHOOK: query: explain select value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: value (type: string)
+ sort order: +
+ Map-reduce partition columns: value (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: key (type: string)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ PTF Operator
+ Function definitions:
+ Input definition
+ input alias: ptf_0
+ output shape: _col0: string, _col1: string
+ type: WINDOWING
+ Windowing table definition
+ input alias: ptf_1
+ name: windowingtablefunction
+ order by: _col1 ASC NULLS FIRST
+ partition by: _col1
+ raw input shape:
+ window functions:
+ window function definition
+ alias: count_window_0
+ arguments: 1
+ name: count
+ window function: GenericUDAFCountEvaluator
+ window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX)
+ Statistics: Num rows: 25 Data size: 11075 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: string), _col0 (type: string), count_window_0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value, key, count(1) over (partition by value) from src1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select value, key, count(1) over (partition by value) from src1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+ 7
+ 7
+ 7
+ 7
+ 128 7
+ 369 7
+ 224 7
+val_146 146 1
+val_150 150 1
+val_165 1
+val_193 1
+val_213 213 1
+val_238 238 1
+val_255 255 1
+val_265 1
+val_27 1
+val_273 273 1
+val_278 278 1
+val_311 311 1
+val_401 401 1
+val_406 406 1
+val_409 1
+val_484 1
+val_66 66 1
+val_98 98 1
+PREHOOK: query: explain select distinct count(*)+key from src1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(*)+key from src1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: key (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (UDFToDouble(_col1) + UDFToDouble(_col0)) (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: double)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: double)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(*)+key from src1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(*)+key from src1 group by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+NULL
+67.0
+99.0
+129.0
+147.0
+151.0
+214.0
+225.0
+239.0
+256.0
+274.0
+279.0
+312.0
+370.0
+402.0
+407.0
+PREHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-3 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: b
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 39 Data size: 10413 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count(_col1), count(_col3)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0), count(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col1, _col2
+ Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col1 (type: bigint), _col2 (type: bigint)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: bigint), KEY._col1 (type: bigint)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct count(a.value), count(b.value) from src a join src1 b on a.key=b.key group by a.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+1 1
+2 2
+3 3
+4 4
+5 5
+PREHOOK: query: explain select distinct * from (select distinct * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: explain select distinct * from (select distinct * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src1
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: key (type: string), value (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: string), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 12 Data size: 2100 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select distinct * from (select distinct * from src1) as T
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: query: select distinct * from (select distinct * from src1) as T
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src1
+#### A masked pattern was here ####
+
+ val_165
+ val_193
+ val_265
+ val_27
+ val_409
+ val_484
+128
+146 val_146
+150 val_150
+213 val_213
+224
+238 val_238
+255 val_255
+273 val_273
+278 val_278
+311 val_311
+369
+401 val_401
+406 val_406
+66 val_66
+98 val_98
diff --git a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out b/ql/src/test/results/compiler/errors/wrong_distinct1.q.out
deleted file mode 100644
index de81b5b..0000000
--- a/ql/src/test/results/compiler/errors/wrong_distinct1.q.out
+++ /dev/null
@@ -1,2 +0,0 @@
-Semantic Exception:
-3:88 SELECT DISTINCT and GROUP BY can not be in the same query. Error encountered near token 'key'
\ No newline at end of file