diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 5a72af1..e0d35d3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -644,8 +644,8 @@ private boolean isField(String col) { */ public List getAllCols() { ArrayList f_list = new ArrayList(); - f_list.addAll(getPartCols()); f_list.addAll(getCols()); + f_list.addAll(getPartCols()); return f_list; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index f04b493..c425770 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -28,7 +28,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.antlr.runtime.CommonToken; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; @@ -406,4 +411,48 @@ void addEncryptedTargetTablePath(Path p) { } return encryptedTargetTablePaths; } + + public ASTNode rewriteStarToSelectColumn(ASTNode ast, Hive db) throws SemanticException { + // first check if there is a single table + List tableNames = new ArrayList<>(); + tableNames.addAll(aliasToTabs.values()); + String tableName = null; + if (tableNames.size() != 1) { + return ast; + } + tableName = tableNames.iterator().next(); + Table table = null; + try { + table = db.getTable(tableName); + } catch (HiveException ex) { + // table not found due to cte or bad table name. + LOG.warn("rewriteStarToSelectColumn throws exception: " + ex.toString() + ". Skip rewriting."); + return ast; + } + ASTNode selectNode = (ASTNode) ast.dupNode(); + for (Node child : ast.getChildren()) { + // child is TOK_SELEXPR + ASTNode node = (ASTNode) child.getChildren().get(0); + // then check if it is a select (distinct) * + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + List cols = table.getAllCols(); + for (FieldSchema fs : cols) { + // then get the column names, buildSelExprSubTree + selectNode.addChild(buildSelExprSubTree(fs.getName())); + } + } else { + selectNode.addChild((ASTNode) child); + } + } + return selectNode; + } + + private ASTNode buildSelExprSubTree(String col) { + ASTNode selexpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR")); + ASTNode tableOrCol = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, + "TOK_TABLE_OR_COL")); + tableOrCol.addChild(new ASTNode(new CommonToken(HiveParser.Identifier, col))); + selexpr.addChild(tableOrCol); + return selexpr; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7d2595d..867da45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1331,6 +1331,10 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan // fall through case HiveParser.TOK_SELECT: qb.countSel(); + // if it is enabled, it is creating a view. + if (!unparseTranslator.isEnabled()) { + ast = qb.rewriteStarToSelectColumn(ast, db); + } qbp.setSelExprForClause(ctx_1.dest, ast); int posn = 0; @@ -12514,7 +12518,7 @@ public static ASTNode genSelectDIAST(RowResolver rr) { } return selectDI; } - private static ASTNode buildSelExprSubTree(String tableAlias, String col) { + static ASTNode buildSelExprSubTree(String tableAlias, String col) { ASTNode selexpr = new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR")); ASTNode tableOrCol = new ASTNode(new CommonToken(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL")); diff --git a/ql/src/test/queries/clientpositive/unfold_star_qb.q b/ql/src/test/queries/clientpositive/unfold_star_qb.q new file mode 100644 index 0000000..bd15846 --- /dev/null +++ b/ql/src/test/queries/clientpositive/unfold_star_qb.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +explain create view s as select * from src order by key limit 2; + +explain create view sdi as select distinct * from src order by key limit 2; + +from src a select distinct a.* where a.key = '238'; + +explain extended select * from (select * from src)subq limit 10; + +select * from (select * from src)subq limit 10; + +create view v as select * from src; + +select * from (select * from v)subq limit 10; + +select * from (select * from src union select * from v)subq limit 10; + +create view v2 as select * from (select * from src union select * from v)subq; + +select * from v2 limit 10; + diff --git a/ql/src/test/results/clientpositive/unfold_star_qb.q.out b/ql/src/test/results/clientpositive/unfold_star_qb.q.out new file mode 100644 index 0000000..d781def --- /dev/null +++ b/ql/src/test/results/clientpositive/unfold_star_qb.q.out @@ -0,0 +1,203 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +explain create view s as select * from src order by key limit 2 +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- SORT_QUERY_RESULTS + +explain create view s as select * from src order by key limit 2 +POSTHOOK: type: CREATEVIEW +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create View Operator: + Create View + or replace: false + columns: key string, value string + expanded text: select `src`.`key`, `src`.`value` from `default`.`src` order by `src`.`key` limit 2 + name: default.s + original text: select * from src order by key limit 2 + +PREHOOK: query: explain create view sdi as select distinct * from src order by key limit 2 +PREHOOK: type: CREATEVIEW +POSTHOOK: query: explain create view sdi as select distinct * from src order by key limit 2 +POSTHOOK: type: CREATEVIEW +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create View Operator: + Create View + or replace: false + columns: key string, value string + expanded text: select distinct `src`.`key`, `src`.`value` from `default`.`src` order by `src`.`key` limit 2 + name: default.sdi + original text: select distinct * from src order by key limit 2 + +PREHOOK: query: from src a select distinct a.* where a.key = '238' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: from src a select distinct a.* where a.key = '238' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 +PREHOOK: query: explain extended select * from (select * from src)subq limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from (select * from src)subq limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + subq + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: select * from (select * from src)subq limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from (select * from src)subq limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +PREHOOK: query: create view v as select * from src +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@v +POSTHOOK: query: create view v as select * from src +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v +PREHOOK: query: select * from (select * from v)subq limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@v +#### A masked pattern was here #### +POSTHOOK: query: select * from (select * from v)subq limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +PREHOOK: query: select * from (select * from src union select * from v)subq limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@v +#### A masked pattern was here #### +POSTHOOK: query: select * from (select * from src union select * from v)subq limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +PREHOOK: query: create view v2 as select * from (select * from src union select * from v)subq +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Input: default@v +PREHOOK: Output: database:default +PREHOOK: Output: default@v2 +POSTHOOK: query: create view v2 as select * from (select * from src union select * from v)subq +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v2 +PREHOOK: query: select * from v2 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@v +PREHOOK: Input: default@v2 +#### A masked pattern was here #### +POSTHOOK: query: select * from v2 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v +POSTHOOK: Input: default@v2 +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114