diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java index bda3c21c7e..895c2f2ebc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java @@ -152,6 +152,17 @@ public Object dupNode(Object t) { return create(((CommonTree)t).token); }; + @Override + public Object dupTree(Object t, Object parent) { + // Overriden to copy start index / end index, that is needed through optimization, + // e.g., for masking/filtering + ASTNode astNode = (ASTNode) t; + ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent); + astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex()); + astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex()); + return astNodeCopy; + } + @Override public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { return new ASTErrorNode(input, start, stop, e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 64b354191e..a2f6fbbcaa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -423,6 +423,10 @@ public boolean isView() { return viewDesc != null && !viewDesc.isMaterialized(); } + public boolean isMultiDestQuery() { + return qbp != null && qbp.getClauseNamesForDest() != null && qbp.getClauseNamesForDest().size() > 1; + } + public HashMap getViewToTabSchema() { return viewAliasToViewSchema; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1271799907..1f6016290e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12126,8 +12126,9 @@ private static void removeASTChild(ASTNode node) { } void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException { - // 1. Generate Resolved Parse tree from syntax tree LOG.info("Starting Semantic Analysis"); + // 1. Generate Resolved Parse tree from syntax tree + boolean needsTransform = needsTransform(); //change the location of position alias process here processPositionAlias(ast); PlannerContext plannerCtx = pcf.create(); @@ -12147,7 +12148,6 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce // Otherwise we have to wait until after the masking/filtering step. boolean isCacheEnabled = isResultsCacheEnabled(); QueryResultsCache.LookupInfo lookupInfo = null; - boolean needsTransform = needsTransform(); if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) { lookupInfo = createLookupInfoForQuery(ast); if (checkResultsCache(lookupInfo)) { @@ -12155,25 +12155,37 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce } } + ASTNode finalAST; + if (isCBOExecuted() && needsTransform && + (qb.isCTAS() || qb.isView() || qb.isMaterializedView() || qb.isMultiDestQuery())) { + // If we use CBO and we may apply masking/filtering policies, we create a copy of the ast. + // The reason is that the generation of the operator tree may modify the initial ast, + // but if we need to parse for a second time, we would like to parse the unmodified ast. + finalAST = (ASTNode) ParseDriver.adaptor.dupTree(ast); + } else { + finalAST = ast; + } + // 2. Gen OP Tree from resolved Parse Tree Operator sinkOp = genOPTree(ast, plannerCtx); if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) { // Here we rewrite the * and also the masking table - ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), + ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, finalAST, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens); - if (tree != ast) { + if (finalAST != rewrittenAST) { + finalAST = rewrittenAST; plannerCtx = pcf.create(); ctx.setSkipTableMasking(true); init(true); //change the location of position alias process here - processPositionAlias(tree); - genResolvedParseTree(tree, plannerCtx); + processPositionAlias(finalAST); + genResolvedParseTree(finalAST, plannerCtx); if (this instanceof CalcitePlanner) { ((CalcitePlanner) this).resetCalciteConfiguration(); } - sinkOp = genOPTree(tree, plannerCtx); + sinkOp = genOPTree(finalAST, plannerCtx); } } @@ -12181,7 +12193,7 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce // In the case that row or column masking/filtering was required, the cache must be checked // here, after applying the masking/filtering rewrite rules to the AST. if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) { - lookupInfo = createLookupInfoForQuery(ast); + lookupInfo = createLookupInfoForQuery(finalAST); if (checkResultsCache(lookupInfo)) { return; } diff --git a/ql/src/test/queries/clientpositive/masking_13.q b/ql/src/test/queries/clientpositive/masking_13.q new file mode 100644 index 0000000000..bb050b5d01 --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_13.q @@ -0,0 +1,28 @@ +--! qt:dataset:srcpart +--! qt:dataset:src +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create table masking_test as select cast(key as int) as key, value from src; + +explain select * from masking_test; +select * from masking_test; + +create table new_masking_test_nx as +select * from masking_test; +select * from new_masking_test_nx; + +create view `masking_test_view` as select key from `masking_test`; + +explain +select key from `masking_test_view`; +select key from `masking_test_view`; + +create table `my_table_masked` (key int); +insert into `my_table_masked` select key from `masking_test_view`; +select * from `my_table_masked`; + +create table new_masking_test_nx_2 as +select * from masking_test_view; + +select * from new_masking_test_nx_2; diff --git a/ql/src/test/results/clientpositive/masking_13.q.out b/ql/src/test/results/clientpositive/masking_13.q.out new file mode 100644 index 0000000000..ee4f6d90ba --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_13.q.out @@ -0,0 +1,208 @@ +PREHOOK: query: create table masking_test as select cast(key as int) as key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create table masking_test as select cast(key as int) as key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select * from masking_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + filterExpr: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create table new_masking_test_nx as +select * from masking_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@new_masking_test_nx +POSTHOOK: query: create table new_masking_test_nx as +select * from masking_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_masking_test_nx +POSTHOOK: Lineage: new_masking_test_nx.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: new_masking_test_nx.value EXPRESSION [(masking_test)masking_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from new_masking_test_nx +PREHOOK: type: QUERY +PREHOOK: Input: default@new_masking_test_nx +#### A masked pattern was here #### +POSTHOOK: query: select * from new_masking_test_nx +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_masking_test_nx +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create view `masking_test_view` as select key from `masking_test` +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_view +POSTHOOK: query: create view `masking_test_view` as select key from `masking_test` +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_view +POSTHOOK: Lineage: masking_test_view.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain +select key from `masking_test_view` +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from `masking_test_view` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + filterExpr: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger((UDFToDouble(key) / 2.0D)) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +#### A masked pattern was here #### +POSTHOOK: query: select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +#### A masked pattern was here #### +4 +PREHOOK: query: create table `my_table_masked` (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@my_table_masked +POSTHOOK: query: create table `my_table_masked` (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@my_table_masked +PREHOOK: query: insert into `my_table_masked` select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Output: default@my_table_masked +POSTHOOK: query: insert into `my_table_masked` select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Output: default@my_table_masked +POSTHOOK: Lineage: my_table_masked.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from `my_table_masked` +PREHOOK: type: QUERY +PREHOOK: Input: default@my_table_masked +#### A masked pattern was here #### +POSTHOOK: query: select * from `my_table_masked` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@my_table_masked +#### A masked pattern was here #### +4 +PREHOOK: query: create table new_masking_test_nx_2 as +select * from masking_test_view +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Output: database:default +PREHOOK: Output: default@new_masking_test_nx_2 +POSTHOOK: query: create table new_masking_test_nx_2 as +select * from masking_test_view +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_masking_test_nx_2 +POSTHOOK: Lineage: new_masking_test_nx_2.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from new_masking_test_nx_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@new_masking_test_nx_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from new_masking_test_nx_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_masking_test_nx_2 +#### A masked pattern was here #### +4