diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java index bda3c21c7e..895c2f2ebc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java @@ -152,6 +152,17 @@ public Object dupNode(Object t) { return create(((CommonTree)t).token); }; + @Override + public Object dupTree(Object t, Object parent) { + // Overriden to copy start index / end index, that is needed through optimization, + // e.g., for masking/filtering + ASTNode astNode = (ASTNode) t; + ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent); + astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex()); + astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex()); + return astNodeCopy; + } + @Override public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { return new ASTErrorNode(input, start, stop, e); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1271799907..117c5bed36 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12126,12 +12126,22 @@ private static void removeASTChild(ASTNode node) { } void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException { - // 1. Generate Resolved Parse tree from syntax tree LOG.info("Starting Semantic Analysis"); + // 1. Generate Resolved Parse tree from syntax tree + boolean needsTransform = needsTransform(); + ASTNode astForInitialTraversal; + if (needsTransform) { + // If we may apply masking/filtering policies, we create a copy of the ast. + // The reason is that the first pass may modify the initial ast, but if we need to + // parse for a second time, we would like to parse the unmodified ast. + astForInitialTraversal = (ASTNode) ParseDriver.adaptor.dupTree(ast); + } else { + astForInitialTraversal = ast; + } //change the location of position alias process here - processPositionAlias(ast); + processPositionAlias(astForInitialTraversal); PlannerContext plannerCtx = pcf.create(); - if (!genResolvedParseTree(ast, plannerCtx)) { + if (!genResolvedParseTree(astForInitialTraversal, plannerCtx)) { return; } @@ -12147,33 +12157,33 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce // Otherwise we have to wait until after the masking/filtering step. boolean isCacheEnabled = isResultsCacheEnabled(); QueryResultsCache.LookupInfo lookupInfo = null; - boolean needsTransform = needsTransform(); if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) { - lookupInfo = createLookupInfoForQuery(ast); + lookupInfo = createLookupInfoForQuery(astForInitialTraversal); if (checkResultsCache(lookupInfo)) { return; } } // 2. Gen OP Tree from resolved Parse Tree - Operator sinkOp = genOPTree(ast, plannerCtx); + Operator sinkOp = genOPTree(astForInitialTraversal, plannerCtx); if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) { // Here we rewrite the * and also the masking table - ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), + ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens); - if (tree != ast) { + if (ast != rewrittenAST) { + ast = rewrittenAST; plannerCtx = pcf.create(); ctx.setSkipTableMasking(true); init(true); //change the location of position alias process here - processPositionAlias(tree); - genResolvedParseTree(tree, plannerCtx); + processPositionAlias(ast); + genResolvedParseTree(ast, plannerCtx); if (this instanceof CalcitePlanner) { ((CalcitePlanner) this).resetCalciteConfiguration(); } - sinkOp = genOPTree(tree, plannerCtx); + sinkOp = genOPTree(ast, plannerCtx); } } diff --git a/ql/src/test/queries/clientpositive/masking_13.q b/ql/src/test/queries/clientpositive/masking_13.q new file mode 100644 index 0000000000..bb050b5d01 --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_13.q @@ -0,0 +1,28 @@ +--! qt:dataset:srcpart +--! qt:dataset:src +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create table masking_test as select cast(key as int) as key, value from src; + +explain select * from masking_test; +select * from masking_test; + +create table new_masking_test_nx as +select * from masking_test; +select * from new_masking_test_nx; + +create view `masking_test_view` as select key from `masking_test`; + +explain +select key from `masking_test_view`; +select key from `masking_test_view`; + +create table `my_table_masked` (key int); +insert into `my_table_masked` select key from `masking_test_view`; +select * from `my_table_masked`; + +create table new_masking_test_nx_2 as +select * from masking_test_view; + +select * from new_masking_test_nx_2; diff --git a/ql/src/test/results/clientpositive/masking_13.q.out b/ql/src/test/results/clientpositive/masking_13.q.out new file mode 100644 index 0000000000..ee4f6d90ba --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_13.q.out @@ -0,0 +1,208 @@ +PREHOOK: query: create table masking_test as select cast(key as int) as key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create table masking_test as select cast(key as int) as key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select * from masking_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + filterExpr: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create table new_masking_test_nx as +select * from masking_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@new_masking_test_nx +POSTHOOK: query: create table new_masking_test_nx as +select * from masking_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_masking_test_nx +POSTHOOK: Lineage: new_masking_test_nx.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: new_masking_test_nx.value EXPRESSION [(masking_test)masking_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from new_masking_test_nx +PREHOOK: type: QUERY +PREHOOK: Input: default@new_masking_test_nx +#### A masked pattern was here #### +POSTHOOK: query: select * from new_masking_test_nx +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_masking_test_nx +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create view `masking_test_view` as select key from `masking_test` +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_view +POSTHOOK: query: create view `masking_test_view` as select key from `masking_test` +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_view +POSTHOOK: Lineage: masking_test_view.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain +select key from `masking_test_view` +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from `masking_test_view` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + filterExpr: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean) + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger((UDFToDouble(key) / 2.0D)) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +#### A masked pattern was here #### +POSTHOOK: query: select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +#### A masked pattern was here #### +4 +PREHOOK: query: create table `my_table_masked` (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@my_table_masked +POSTHOOK: query: create table `my_table_masked` (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@my_table_masked +PREHOOK: query: insert into `my_table_masked` select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Output: default@my_table_masked +POSTHOOK: query: insert into `my_table_masked` select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Output: default@my_table_masked +POSTHOOK: Lineage: my_table_masked.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from `my_table_masked` +PREHOOK: type: QUERY +PREHOOK: Input: default@my_table_masked +#### A masked pattern was here #### +POSTHOOK: query: select * from `my_table_masked` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@my_table_masked +#### A masked pattern was here #### +4 +PREHOOK: query: create table new_masking_test_nx_2 as +select * from masking_test_view +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Output: database:default +PREHOOK: Output: default@new_masking_test_nx_2 +POSTHOOK: query: create table new_masking_test_nx_2 as +select * from masking_test_view +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_masking_test_nx_2 +POSTHOOK: Lineage: new_masking_test_nx_2.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from new_masking_test_nx_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@new_masking_test_nx_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from new_masking_test_nx_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_masking_test_nx_2 +#### A masked pattern was here #### +4