diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java index 4003274..383fa8c 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java @@ -131,6 +131,18 @@ public boolean needTransform() { } privObj.setCellValueTransformers(cellValueTransformers); needRewritePrivObjs.add(privObj); + } else if (privObj.getObjectName().equals("masking_test_view")) { + privObj.setRowFilterExpression("key > 6"); + List cellValueTransformers = new ArrayList<>(); + for (String columnName : privObj.getColumns()) { + if (columnName.equals("key")) { + cellValueTransformers.add("key / 2"); + } else { + cellValueTransformers.add(columnName); + } + } + privObj.setCellValueTransformers(cellValueTransformers); + needRewritePrivObjs.add(privObj); } else if (privObj.getObjectName().equals("masking_test_subq")) { privObj .setRowFilterExpression("key in (select key from src where src.key = masking_test_subq.key)"); diff --git ql/src/java/org/apache/hadoop/hive/ql/Context.java ql/src/java/org/apache/hadoop/hive/ql/Context.java index cedb486..2753f1f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Context.java +++ ql/src/java/org/apache/hadoop/hive/ql/Context.java @@ -102,6 +102,10 @@ // number of previous attempts protected int tryCount = 0; private TokenRewriteStream tokenRewriteStream; + // Holds the qualified name to tokenRewriteStream for the views + // referenced by the query. This is used to rewrite the view AST + // with column masking and row filtering policies. + private final Map viewsTokenRewriteStreams; private final String executionId; // Some statements, e.g., UPDATE, DELETE, or MERGE, get rewritten into different @@ -282,6 +286,8 @@ private Context(Configuration conf, String executionId) { scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION); stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR); opContext = new CompilationOpContext(); + + viewsTokenRewriteStreams = new HashMap<>(); } public Map getFsScratchDirs() { @@ -807,6 +813,15 @@ public TokenRewriteStream getTokenRewriteStream() { return tokenRewriteStream; } + public void addViewTokenRewriteStream(String viewFullyQualifiedName, + TokenRewriteStream tokenRewriteStream) { + viewsTokenRewriteStreams.put(viewFullyQualifiedName, tokenRewriteStream); + } + + public TokenRewriteStream getViewTokenRewriteStream(String viewFullyQualifiedName) { + return viewsTokenRewriteStreams.get(viewFullyQualifiedName); + } + /** * Generate a unique executionId. An executionId, together with user name and * the configuration, will determine the temporary locations of all intermediate diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java index f9ad07f..d9a16a2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java @@ -163,7 +163,7 @@ public ASTNode parse(String command) throws ParseException { public ASTNode parse(String command, Context ctx) throws ParseException { - return parse(command, ctx, true); + return parse(command, ctx, null); } /** @@ -180,7 +180,7 @@ public ASTNode parse(String command, Context ctx) * * @return parsed AST */ - public ASTNode parse(String command, Context ctx, boolean setTokenRewriteStream) + public ASTNode parse(String command, Context ctx, String viewFullyQualifiedName) throws ParseException { if (LOG.isDebugEnabled()) { LOG.debug("Parsing command: " + command); @@ -189,8 +189,12 @@ public ASTNode parse(String command, Context ctx, boolean setTokenRewriteStream) HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); TokenRewriteStream tokens = new TokenRewriteStream(lexer); if (ctx != null) { - if ( setTokenRewriteStream) { + if (viewFullyQualifiedName == null) { + // Top level query ctx.setTokenRewriteStream(tokens); + } else { + // It is a view + ctx.addViewTokenRewriteStream(viewFullyQualifiedName, tokens); } lexer.setHiveConf(ctx.getConf()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 51aeeed..c9c2e1d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -67,14 +67,14 @@ public static ASTNode parse(String command) throws ParseException { /** Parses the Hive query. */ public static ASTNode parse(String command, Context ctx) throws ParseException { - return parse(command, ctx, true); + return parse(command, ctx, null); } /** Parses the Hive query. */ public static ASTNode parse( - String command, Context ctx, boolean setTokenRewriteStream) throws ParseException { + String command, Context ctx, String viewFullyQualifiedName) throws ParseException { ParseDriver pd = new ParseDriver(); - ASTNode tree = pd.parse(command, ctx, setTokenRewriteStream); + ASTNode tree = pd.parse(command, ctx, viewFullyQualifiedName); tree = findRootNonNullToken(tree); handleSetColRefs(tree); return tree; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2fbdd70..5e45142 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -46,6 +46,7 @@ import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.Token; +import org.antlr.runtime.TokenRewriteStream; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; import org.antlr.runtime.tree.TreeVisitorAction; @@ -2457,11 +2458,18 @@ private void replaceViewReferenceWithDefinition(QB qb, Table tab, tab.getViewExpandedText(), alias, qb.getParseInfo().getSrcForAlias( alias)); try { - String viewText = tab.getViewExpandedText(); // Reparse text, passing null for context to avoid clobbering // the top-level token stream. - ASTNode tree = ParseUtils.parse(viewText, ctx, false); - viewTree = tree; + String viewFullyQualifiedName = tab.getCompleteName(); + String viewText = tab.getViewExpandedText(); + TableMask viewMask = new TableMask(this, conf, false); + viewTree = ParseUtils.parse(viewText, ctx, tab.getCompleteName()); + if (!unparseTranslator.isEnabled() && + (viewMask.isEnabled() && analyzeRewrite == null)) { + viewTree = rewriteASTWithMaskAndFilter(viewMask, viewTree, + ctx.getViewTokenRewriteStream(viewFullyQualifiedName), + ctx, db, tabNameToTabObject, ignoredTokens); + } Dispatcher nodeOriginDispatcher = new Dispatcher() { @Override public Object dispatch(Node nd, java.util.Stack stack, @@ -11027,7 +11035,8 @@ private Table getTableObjectByName(String tableName) throws HiveException { return getTableObjectByName(tableName, true); } - private void walkASTMarkTABREF(ASTNode ast, Set cteAlias) + private static void walkASTMarkTABREF(TableMask tableMask, ASTNode ast, Set cteAlias, + Context ctx, Hive db, Map tabNameToTabObject, Set ignoredTokens) throws SemanticException { Queue queue = new LinkedList<>(); queue.add(ast); @@ -11072,10 +11081,15 @@ private void walkASTMarkTABREF(ASTNode ast, Set cteAlias) String replacementText = null; Table table = null; try { - table = getTableObjectByName(tabIdName); + if (!tabNameToTabObject.containsKey(tabIdName)) { + table = db.getTable(tabIdName, true); + tabNameToTabObject.put(tabIdName, table); + } else { + table = tabNameToTabObject.get(tabIdName); + } } catch (HiveException e) { // Table may not be found when materialization of CTE is on. - LOG.info("Table " + tabIdName + " is not found in walkASTMarkTABREF."); + STATIC_LOG.debug("Table " + tabIdName + " is not found in walkASTMarkTABREF."); continue; } @@ -11121,7 +11135,9 @@ private void walkASTMarkTABREF(ASTNode ast, Set cteAlias) // the table needs to be masked or filtered. // For the replacement, we leverage the methods that are used for // unparseTranslator. - public ASTNode rewriteASTWithMaskAndFilter(ASTNode ast) throws SemanticException { + protected static ASTNode rewriteASTWithMaskAndFilter(TableMask tableMask, ASTNode ast, TokenRewriteStream tokenRewriteStream, + Context ctx, Hive db, Map tabNameToTabObject, Set ignoredTokens) + throws SemanticException { // 1. collect information about CTE if there is any. // The base table of CTE should be masked. // The CTE itself should not be masked in the references in the following main query. @@ -11144,23 +11160,26 @@ public ASTNode rewriteASTWithMaskAndFilter(ASTNode ast) throws SemanticException throw new SemanticException("Duplicate definition of " + alias); } else { cteAlias.add(alias); - walkASTMarkTABREF(subq, cteAlias); + walkASTMarkTABREF(tableMask, subq, cteAlias, + ctx, db, tabNameToTabObject, ignoredTokens); } } // walk the other part of ast for (int index = 1; index < ast.getChildCount(); index++) { - walkASTMarkTABREF((ASTNode) ast.getChild(index), cteAlias); + walkASTMarkTABREF(tableMask, (ASTNode) ast.getChild(index), cteAlias, + ctx, db, tabNameToTabObject, ignoredTokens); } } // there is no CTE, walk the whole AST else { - walkASTMarkTABREF(ast, cteAlias); + walkASTMarkTABREF(tableMask, ast, cteAlias, + ctx, db, tabNameToTabObject, ignoredTokens); } // 2. rewrite the AST, replace TABREF with masking/filtering if (tableMask.needsRewrite()) { - tableMask.applyTranslations(ctx.getTokenRewriteStream()); - String rewrittenQuery = ctx.getTokenRewriteStream().toString(ast.getTokenStartIndex(), - ast.getTokenStopIndex()); + tableMask.applyTranslations(tokenRewriteStream); + String rewrittenQuery = tokenRewriteStream.toString( + ast.getTokenStartIndex(), ast.getTokenStopIndex()); ASTNode rewrittenTree; // Parse the rewritten query string // check if we need to ctx.setCmd(rewrittenQuery); @@ -11235,7 +11254,7 @@ else if(ast.getChild(0).getType() == HiveParser.TOK_FALSE) { // masking and filtering should be created here // the basic idea is similar to unparseTranslator. - tableMask = new TableMask(this, conf, ctx); + tableMask = new TableMask(this, conf, ctx.isSkipTableMasking()); // 4. continue analyzing from the child ASTNode. Phase1Ctx ctx_1 = initPhase1Ctx(); @@ -11396,7 +11415,8 @@ void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticExce if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) { // Here we rewrite the * and also the masking table - ASTNode tree = rewriteASTWithMaskAndFilter(ast); + ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), + ctx, db, tabNameToTabObject, ignoredTokens); if (tree != ast) { plannerCtx = pcf.create(); ctx.setSkipTableMasking(true); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java index e34351c..4e57b1b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java @@ -22,7 +22,6 @@ import org.antlr.runtime.TokenRewriteStream; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; @@ -48,7 +47,8 @@ private HiveAuthzContext queryContext; private HiveConf conf; - public TableMask(SemanticAnalyzer analyzer, HiveConf conf, Context ctx) throws SemanticException { + public TableMask(SemanticAnalyzer analyzer, HiveConf conf, boolean skipTableMasking) + throws SemanticException { try { authorizer = SessionState.get().getAuthorizerV2(); this.conf = conf; @@ -59,7 +59,7 @@ public TableMask(SemanticAnalyzer analyzer, HiveConf conf, Context ctx) throws S ctxBuilder.setUserIpAddress(ss.getUserIpAddress()); ctxBuilder.setForwardedAddresses(ss.getForwardedAddresses()); queryContext = ctxBuilder.build(); - if (authorizer != null && needTransform() && !ctx.isSkipTableMasking()) { + if (authorizer != null && needTransform() && !skipTableMasking) { enable = true; translator = new UnparseTranslator(conf); translator.enable(); diff --git ql/src/test/queries/clientpositive/masking_12.q ql/src/test/queries/clientpositive/masking_12.q new file mode 100644 index 0000000..cf12da9 --- /dev/null +++ ql/src/test/queries/clientpositive/masking_12.q @@ -0,0 +1,34 @@ +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create table `masking_test` as select cast(key as int) as key, value from src; + +create view `v0` as select * from `masking_test`; + +explain +select * from `v0`; + +select * from `v0`; + +create table `masking_test_subq` as select cast(key as int) as key, value from src; + +create view `v1` as select * from `masking_test_subq`; + +explain +select * from `v1` +limit 20; + +select * from `v1` +limit 20; + +create view `masking_test_view` as select key from `v0`; + +explain +select key from `masking_test_view`; + +select key from `masking_test_view`; + +explain +select `v0`.value from `v0` join `masking_test_view` on `v0`.key = `masking_test_view`.key; + +select `v0`.value from `v0` join `masking_test_view` on `v0`.key = `masking_test_view`.key; diff --git ql/src/test/results/clientpositive/masking_12.q.out ql/src/test/results/clientpositive/masking_12.q.out new file mode 100644 index 0000000..540c53e --- /dev/null +++ ql/src/test/results/clientpositive/masking_12.q.out @@ -0,0 +1,437 @@ +PREHOOK: query: create table `masking_test` as select cast(key as int) as key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create table `masking_test` as select cast(key as int) as key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create view `v0` as select * from `masking_test` +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@v0 +POSTHOOK: query: create view `v0` as select * from `masking_test` +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v0 +POSTHOOK: Lineage: v0.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: v0.value SIMPLE [(masking_test)masking_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select * from `v0` +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from `v0` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from `v0` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@v0 +#### A masked pattern was here #### +POSTHOOK: query: select * from `v0` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@v0 +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create table `masking_test_subq` as select cast(key as int) as key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_subq +POSTHOOK: query: create table `masking_test_subq` as select cast(key as int) as key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_subq +POSTHOOK: Lineage: masking_test_subq.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: masking_test_subq.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create view `v1` as select * from `masking_test_subq` +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@masking_test_subq +PREHOOK: Output: database:default +PREHOOK: Output: default@v1 +POSTHOOK: query: create view `v1` as select * from `masking_test_subq` +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@masking_test_subq +POSTHOOK: Output: database:default +POSTHOOK: Output: default@v1 +POSTHOOK: Lineage: v1.key SIMPLE [(masking_test_subq)masking_test_subq.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: v1.value SIMPLE [(masking_test_subq)masking_test_subq.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain +select * from `v1` +limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from `v1` +limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test_subq + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col1 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double), _col0 (type: int) + 1 UDFToDouble(_col0) (type: double), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select * from `v1` +limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test_subq +PREHOOK: Input: default@src +PREHOOK: Input: default@v1 +#### A masked pattern was here #### +POSTHOOK: query: select * from `v1` +limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test_subq +POSTHOOK: Input: default@src +POSTHOOK: Input: default@v1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +10 val_10 +11 val_11 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +17 val_17 +18 val_18 +18 val_18 +19 val_19 +PREHOOK: query: create view `masking_test_view` as select key from `v0` +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@v0 +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_view +POSTHOOK: query: create view `masking_test_view` as select key from `v0` +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@v0 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_view +POSTHOOK: Lineage: masking_test_view.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain +select key from `masking_test_view` +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from `masking_test_view` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger((UDFToDouble(key) / 2.0)) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Input: default@v0 +#### A masked pattern was here #### +POSTHOOK: query: select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Input: default@v0 +#### A masked pattern was here #### +4 +PREHOOK: query: explain +select `v0`.value from `v0` join `masking_test_view` on `v0`.key = `masking_test_view`.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select `v0`.value from `v0` join `masking_test_view` on `v0`.key = `masking_test_view`.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: masking_test + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((UDFToInteger((UDFToDouble(key) / 2.0)) % 2) = 0) and ((key % 2) = 0) and (UDFToInteger((UDFToDouble(key) / 2.0)) < 10) and (key < 10) and (key > 6)) (type: boolean) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger((UDFToDouble(key) / 2.0)) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select `v0`.value from `v0` join `masking_test_view` on `v0`.key = `masking_test_view`.key +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Input: default@v0 +#### A masked pattern was here #### +POSTHOOK: query: select `v0`.value from `v0` join `masking_test_view` on `v0`.key = `masking_test_view`.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Input: default@v0 +#### A masked pattern was here #### +4_lav