diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java index feb8558..9114365 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -22,7 +22,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** @@ -68,13 +67,6 @@ public ColumnInfo(String internalName, TypeInfo type, String tabAlias, this(internalName, type, tabAlias, isVirtualCol, false); } - public ColumnInfo(String internalName, Class type, String tabAlias, - boolean isVirtualCol) { - this(internalName, TypeInfoFactory - .getPrimitiveTypeInfoFromPrimitiveWritable(type), tabAlias, - isVirtualCol, false); - } - public ColumnInfo(String internalName, TypeInfo type, String tabAlias, boolean isVirtualCol, boolean isHiddenVirtualCol) { this(internalName, @@ -84,9 +76,8 @@ public ColumnInfo(String internalName, TypeInfo type, String tabAlias, isHiddenVirtualCol); } - public ColumnInfo(String internalName, ObjectInspector objectInspector, - String tabAlias, boolean isVirtualCol) { - this(internalName, objectInspector, tabAlias, isVirtualCol, false); + public ColumnInfo(String internalName, ObjectInspector objectInspector, String tabAlias) { + this(internalName, objectInspector, tabAlias, false, false); } public ColumnInfo(String internalName, ObjectInspector objectInspector, @@ -151,6 +142,10 @@ public boolean isHiddenVirtualCol() { return isHiddenVirtualCol; } + public boolean isAnyVirtualColumn() { + return isVirtualCol || isHiddenVirtualCol; + } + /** * Returns the string representation of the ColumnInfo. */ diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g index f448b16..41e495b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g @@ -58,6 +58,8 @@ tableOrColumn @after { gParent.popMsg(state); } : identifier -> ^(TOK_TABLE_OR_COL identifier) + | + DOLLAR Number -> ^(TOK_TABLE_OR_COL Number) ; expressionList diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 9c001c1..8dfbd1d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -273,7 +273,7 @@ atomExpression precedenceFieldExpression : - atomExpression ((LSQUARE^ expression RSQUARE!) | (DOT^ identifier))* + atomExpression ((LSQUARE^ expression RSQUARE!) | (DOT^ DOLLAR! Number) | (DOT^ identifier))* ; precedenceUnaryOperator diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1d8d764..e3e4a00 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3269,7 +3269,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { } ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), - exp.getWritableObjectInspector(), tabAlias, false); + exp.getWritableObjectInspector(), tabAlias); colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc) exp) .isSkewedCol() : false); out_rwsch.put(tabAlias, colAlias, colInfo); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index e7da289..ad9fe61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -485,8 +485,22 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, String tableOrCol = BaseSemanticAnalyzer.unescapeIdentifier(expr .getChild(0).getText()); - boolean isTableAlias = input.hasTableAlias(tableOrCol); - ColumnInfo colInfo = input.get(null, tableOrCol); + ColumnInfo colInfo; + boolean isTableAlias = false; + if (expr.getChild(0).getType() == HiveParser.Number) { + if (parent != null && parent.getType() == HiveParser.DOT) { + // aliased index.. resolve later + return null; + } + colInfo = getColumn((ASTNode) expr.getChild(0), input); + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0)), expr); + return null; + } + } else { + isTableAlias = input.hasTableAlias(tableOrCol); + colInfo = input.get(null, tableOrCol); + } if (isTableAlias) { if (colInfo != null) { @@ -1064,8 +1078,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, .getChild(0).getChild(0).getText()); // NOTE: tableAlias must be a valid non-ambiguous table alias, // because we've checked that in TOK_TABLE_OR_COL's process method. - ColumnInfo colInfo = input.get(tableAlias, + ColumnInfo colInfo; + if (expr.getChild(1).getType() == HiveParser.Number) { + colInfo = getColumn((ASTNode) expr.getChild(1), input, tableAlias); + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); + return null; + } + } else { + colInfo = input.get(tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString()); + } if (colInfo == null) { ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); @@ -1145,6 +1168,32 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } + // zero based index + private static ColumnInfo getColumn(ASTNode expr, RowResolver input) throws SemanticException { + if (input.getTableNames().size() > 1) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(expr)); + } + int index = Integer.valueOf(expr.getText()); + List columns = input.getRowSchema().getSignature(); + if (index < columns.size()) { + ColumnInfo column = columns.get(index); + return column.isAnyVirtualColumn() ? null : column; + } + return null; + } + + // zero based index + private static ColumnInfo getColumn(ASTNode expr, RowResolver input, String tableAlias) { + int index = Integer.valueOf(expr.getText()); + HashMap fieldMap = input.getFieldMap(tableAlias); + if (fieldMap != null && index < fieldMap.size()) { + List columns = new ArrayList(fieldMap.values()); + ColumnInfo column = columns.get(index); + return column.isAnyVirtualColumn() ? null : column; + } + return null; + } + /** * Factory method to get DefaultExprProcessor. * diff --git ql/src/test/queries/clientnegative/select_by_column_index_negative0.q ql/src/test/queries/clientnegative/select_by_column_index_negative0.q new file mode 100644 index 0000000..380f26b --- /dev/null +++ ql/src/test/queries/clientnegative/select_by_column_index_negative0.q @@ -0,0 +1 @@ +explain select $0, $1, $2 from src; diff --git ql/src/test/queries/clientnegative/select_by_column_index_negative1.q ql/src/test/queries/clientnegative/select_by_column_index_negative1.q new file mode 100644 index 0000000..f436589 --- /dev/null +++ ql/src/test/queries/clientnegative/select_by_column_index_negative1.q @@ -0,0 +1,2 @@ +explain select a.$0 from src a join src1 b on $0=b.$0; + diff --git ql/src/test/queries/clientnegative/select_by_column_index_negative2.q ql/src/test/queries/clientnegative/select_by_column_index_negative2.q new file mode 100644 index 0000000..15d5597 --- /dev/null +++ ql/src/test/queries/clientnegative/select_by_column_index_negative2.q @@ -0,0 +1,2 @@ +explain select $0 from src a join src1 b on a.$0=b.$0; + diff --git ql/src/test/queries/clientpositive/select_by_column_index.q ql/src/test/queries/clientpositive/select_by_column_index.q new file mode 100644 index 0000000..e6f8c1a --- /dev/null +++ ql/src/test/queries/clientpositive/select_by_column_index.q @@ -0,0 +1,13 @@ +-- SORT_QUERY_RESULTS + +explain +select $0, $1 from src TABLESAMPLE(10 ROWS) order by $0; +select $0, $1 from src TABLESAMPLE(10 ROWS) order by $0; + +explain +select $1, sum($0) from src TABLESAMPLE(10 ROWS) group by $1; +select $1, sum($0) from src TABLESAMPLE(10 ROWS) group by $1; + +explain +select a.$0, a.$1, b.$1 from src a join src1 b on a.$0=b.$0; +select a.$0, a.$1, b.$1 from src a join src1 b on a.$0=b.$0; diff --git ql/src/test/results/clientnegative/select_by_column_index_negative0.q.out ql/src/test/results/clientnegative/select_by_column_index_negative0.q.out new file mode 100644 index 0000000..f60e4b1 --- /dev/null +++ ql/src/test/results/clientnegative/select_by_column_index_negative0.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10002]: Line 1:24 Invalid column reference '2' diff --git ql/src/test/results/clientnegative/select_by_column_index_negative1.q.out ql/src/test/results/clientnegative/select_by_column_index_negative1.q.out new file mode 100644 index 0000000..bb82f1d --- /dev/null +++ ql/src/test/results/clientnegative/select_by_column_index_negative1.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10009]: Line 1:47 Invalid table alias '0' diff --git ql/src/test/results/clientnegative/select_by_column_index_negative2.q.out ql/src/test/results/clientnegative/select_by_column_index_negative2.q.out new file mode 100644 index 0000000..acd7133 --- /dev/null +++ ql/src/test/results/clientnegative/select_by_column_index_negative2.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10007]: Line 1:16 Ambiguous column reference '0' diff --git ql/src/test/results/clientpositive/select_by_column_index.q.out ql/src/test/results/clientpositive/select_by_column_index.q.out new file mode 100644 index 0000000..aae3083 --- /dev/null +++ ql/src/test/results/clientpositive/select_by_column_index.q.out @@ -0,0 +1,257 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +explain +select $0, $1 from src TABLESAMPLE(10 ROWS) order by $0 +PREHOOK: type: QUERY +POSTHOOK: query: -- SORT_QUERY_RESULTS + +explain +select $0, $1 from src TABLESAMPLE(10 ROWS) order by $0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select $0, $1 from src TABLESAMPLE(10 ROWS) order by $0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select $0, $1 from src TABLESAMPLE(10 ROWS) order by $0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +PREHOOK: query: explain +select $1, sum($0) from src TABLESAMPLE(10 ROWS) group by $1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select $1, sum($0) from src TABLESAMPLE(10 ROWS) group by $1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(key) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select $1, sum($0) from src TABLESAMPLE(10 ROWS) group by $1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select $1, sum($0) from src TABLESAMPLE(10 ROWS) group by $1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_165 165.0 +val_238 238.0 +val_255 255.0 +val_27 27.0 +val_278 278.0 +val_311 311.0 +val_409 409.0 +val_484 484.0 +val_86 86.0 +val_98 98.0 +PREHOOK: query: explain +select a.$0, a.$1, b.$1 from src a join src1 b on a.$0=b.$0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.$0, a.$1, b.$1 from src a join src1 b on a.$0=b.$0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 216 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + TableScan + alias: a + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 15 Data size: 3006 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 3306 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.$0, a.$1, b.$1 from src a join src1 b on a.$0=b.$0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: select a.$0, a.$1, b.$1 from src a join src1 b on a.$0=b.$0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +128 val_128 +128 val_128 +128 val_128 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +224 val_224 +224 val_224 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +369 val_369 +369 val_369 +369 val_369 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98