diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index feb8558..909be09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -58,6 +58,8 @@ private boolean isHiddenVirtualCol; + private boolean isTransientCol; + private String typeName; public ColumnInfo() { @@ -151,6 +153,14 @@ public boolean isHiddenVirtualCol() { return isHiddenVirtualCol; } + public boolean isTransientCol() { + return isTransientCol; + } + + public void setTransientCol(boolean transientCol) { + isTransientCol = transientCol; + } + /** * Returns the string representation of the ColumnInfo. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index cb284d7..934e953 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -24,6 +24,7 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -2380,9 +2381,9 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, } @SuppressWarnings("nls") - private Integer genColListRegex(String colRegex, String tabAlias, + private int genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList col_list, - RowResolver input, Integer pos, RowResolver output, List aliases, boolean subQuery) + RowResolver input, int pos, RowResolver output, List aliases, boolean subQuery) throws SemanticException { // The table alias should exist @@ -2449,13 +2450,12 @@ private Integer genColListRegex(String colRegex, String tabAlias, name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isSkewedCol()); col_list.add(expr); - oColInfo = new ColumnInfo(getColumnInternalName(pos), + oColInfo = new ColumnInfo(getColumnInternalName(pos++), colInfo.getType(), colInfo.getTabAlias(), colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()); inputColsProcessed.put(colInfo, oColInfo); } output.put(tmp[0], tmp[1], oColInfo); - pos = Integer.valueOf(pos.intValue() + 1); matched++; if (unparseTranslator.isEnabled()) { @@ -3035,9 +3035,10 @@ private static boolean isRegex(String pattern, HiveConf conf) { private Operator genSelectPlan(String dest, QB qb, Operator input) throws SemanticException { - ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); - - Operator op = genSelectPlan(selExprList, qb, input, false); + QBParseInfo qbp = qb.getParseInfo(); + ASTNode selExprList = qbp.getSelForClause(dest); + List rsKeys = collectKeys(dest, qbp); + Operator op = genSelectPlan(selExprList, rsKeys, qb, input, false); if (LOG.isDebugEnabled()) { LOG.debug("Created Select Plan for clause: " + dest); @@ -3046,8 +3047,38 @@ private static boolean isRegex(String pattern, HiveConf conf) { return op; } + // extract references from order-by/sort-by/dist-by/cluster-by + private List collectKeys(String dest, QBParseInfo qbp) { + List keys = new ArrayList(); + if (qbp.getClusterByForClause(dest) != null) { + for (Object node : qbp.getClusterByForClause(dest).getChildren()) { + ASTNode dummy = new ASTNode(); + dummy.addChild((ASTNode)node); + keys.add(dummy); + } + } + if (qbp.getDistributeByForClause(dest) != null) { + for (Object node : qbp.getDistributeByForClause(dest).getChildren()) { + ASTNode dummy = new ASTNode(); + dummy.addChild((ASTNode)node); + keys.add(dummy); + } + } + if (qbp.getOrderByForClause(dest) != null) { + for (Object node : qbp.getOrderByForClause(dest).getChildren()) { + keys.add((ASTNode) node); + } + } + if (qbp.getSortByForClause(dest) != null) { + for (Object node : qbp.getSortByForClause(dest).getChildren()) { + keys.add((ASTNode) node); + } + } + return keys; + } + @SuppressWarnings("nls") - private Operator genSelectPlan(ASTNode selExprList, QB qb, + private Operator genSelectPlan(ASTNode selExprList, List rsKeys, QB qb, Operator input, boolean outerLV) throws SemanticException { if (LOG.isDebugEnabled()) { @@ -3057,7 +3088,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { ArrayList col_list = new ArrayList(); RowResolver out_rwsch = new RowResolver(); ASTNode trfm = null; - Integer pos = Integer.valueOf(0); + int pos = 0; RowResolver inputRR = opParseCtx.get(input).getRowResolver(); // SELECT * or SELECT TRANSFORM(*) boolean selectStar = false; @@ -3104,7 +3135,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { .getChild(0)); } if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) { - genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0), + pos = genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0), col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); } } @@ -3267,7 +3298,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { out_rwsch.checkColumn(tabAlias, colAlias); } - ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), + ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos++), exp.getWritableObjectInspector(), tabAlias, false); colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) .isSkewedCol() : false); @@ -3280,12 +3311,34 @@ private static boolean isRegex(String pattern, HiveConf conf) { out_rwsch.put(altMapping[0], altMapping[1], colInfo); } } - - pos = Integer.valueOf(pos.intValue() + 1); } } selectStar = selectStar && exprList.getChildCount() == posn + 1; + if (!isInTransform && !selectStar && !rsKeys.isEmpty()) { + TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); + for (ASTNode rsKey : rsKeys) { + ExprNodeDesc expr = genExprNodeDesc((ASTNode) rsKey.getChild(0), inputRR, tcCtx); + for (ExprNodeColumnDesc columnExpr : ExprNodeDescUtils.extractColumns(expr)) { + if (ExprNodeDescUtils.indexOf(columnExpr, col_list) >= 0) { + continue; // referenced column in select + } + String tabAlias = columnExpr.getTabAlias(); + if (!inputRR.getTableNames().contains(tabAlias)) { + continue; + } + String colAlias = columnExpr.getColumn(); + if (out_rwsch.get(null, colAlias) == null) { + String colName = getColumnInternalName(pos++); + ColumnInfo colInfo = new ColumnInfo(colName, columnExpr.getTypeInfo(), tabAlias, false); + colInfo.setTransientCol(true); + out_rwsch.put(tabAlias, colAlias, colInfo); + col_list.add(columnExpr); + } + } + } + } + ArrayList columnNames = new ArrayList(); Map colExprMap = new HashMap(); for (int i = 0; i < col_list.size(); i++) { @@ -6596,6 +6649,9 @@ private Operator genReduceSinkPlan(String dest, QB qb, Operator input, for (int i = 0; i < index.length; i++) { ColumnInfo prev = columnInfos.get(i); + if (prev.isTransientCol()) { + continue; + } String[] nm = inputRR.reverseLookup(prev.getInternalName()); String[] nm2 = inputRR.getAlternateMappings(prev.getInternalName()); ColumnInfo info = new ColumnInfo(prev); @@ -9274,7 +9330,7 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) // Get the UDTF Path QB blankQb = new QB(null, null, false); Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree - .getChild(0), blankQb, lvForward, + .getChild(0), Collections.emptyList(), blankQb, lvForward, lateralViewTree.getType() == HiveParser.TOK_LATERAL_VIEW_OUTER); // add udtf aliases to QB for (String udtfAlias : blankQb.getAliases()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index f293c43..23cb4bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -37,7 +37,7 @@ public class ExprNodeDescUtils { - public static int indexOf(ExprNodeDesc origin, List sources) { + public static int indexOf(ExprNodeDesc origin, List sources) { for (int i = 0; i < sources.size(); i++) { if (origin.isSame(sources.get(i))) { return i; @@ -373,4 +373,25 @@ private static ExprNodeConstantDesc foldConstant(ExprNodeGenericFuncDesc func) { return null; } } + + public static List extractColumns(ExprNodeDesc expr) { + return extractColumns(expr, new ArrayList()); + } + + private static List extractColumns(ExprNodeDesc expr, + List extracted) { + if (expr instanceof ExprNodeColumnDesc) { + if (indexOf(expr, extracted) < 0) { + extracted.add((ExprNodeColumnDesc) expr); + } + return extracted; + } + if (expr.getChildren() != null) { + for (ExprNodeDesc child : expr.getChildren()) { + extractColumns(child, extracted); + } + } + return extracted; + } + } diff --git ql/src/test/queries/clientpositive/keys_not_in_select.q ql/src/test/queries/clientpositive/keys_not_in_select.q new file mode 100644 index 0000000..ec67868 --- /dev/null +++ ql/src/test/queries/clientpositive/keys_not_in_select.q @@ -0,0 +1,7 @@ +EXPLAIN +SELECT value FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key; +SELECT value FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key; + +EXPLAIN +SELECT key+1 FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key; +SELECT key+1 FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key; diff --git ql/src/test/results/clientpositive/keys_not_in_select.q.out ql/src/test/results/clientpositive/keys_not_in_select.q.out new file mode 100644 index 0000000..742b3b3 --- /dev/null +++ ql/src/test/results/clientpositive/keys_not_in_select.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: EXPLAIN +SELECT value FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT value FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 20 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT value FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT value FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_128 +val_150 +val_165 +val_193 +val_213 +val_224 +val_238 +val_255 +val_265 +val_27 +val_273 +val_278 +val_311 +val_369 +val_401 +val_409 +val_484 +val_66 +val_86 +val_98 +PREHOOK: query: EXPLAIN +SELECT key+1 FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key+1 FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 20 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: double), key (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key+1 FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key+1 FROM SRC TABLESAMPLE (20 ROWS) ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +129.0 +151.0 +166.0 +194.0 +214.0 +225.0 +239.0 +256.0 +266.0 +28.0 +274.0 +279.0 +312.0 +370.0 +402.0 +410.0 +485.0 +67.0 +87.0 +99.0