diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java index 1686f36..7407b67 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java @@ -17,16 +17,14 @@ */ package org.apache.hadoop.hive.ql.parse; -import java.util.ArrayList; import java.util.List; import org.antlr.runtime.TokenRewriteStream; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer; import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject; -import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType; import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext; import org.apache.hadoop.hive.ql.session.SessionState; import org.slf4j.Logger; @@ -46,10 +44,12 @@ private boolean enable; private boolean needsRewrite; private HiveAuthzContext queryContext; + private HiveConf conf; public TableMask(SemanticAnalyzer analyzer, HiveConf conf) throws SemanticException { try { authorizer = SessionState.get().getAuthorizerV2(); + this.conf = conf; String cmdString = analyzer.ctx.getCmd(); SessionState ss = SessionState.get(); HiveAuthzContext.Builder ctxBuilder = new HiveAuthzContext.Builder(); @@ -83,6 +83,9 @@ public boolean needTransform() throws SemanticException { public String create(HivePrivilegeObject privObject, MaskAndFilterInfo maskAndFilterInfo) throws SemanticException { + boolean doColumnMasking = false; + boolean doRowFiltering = false; + boolean containsAutogenColumnAlias = false; StringBuilder sb = new StringBuilder(); sb.append("(SELECT "); boolean firstOne = true; @@ -105,33 +108,42 @@ public String create(HivePrivilegeObject privObject, MaskAndFilterInfo maskAndFi firstOne = false; } String colName = privObject.getColumns().get(index); + if (colName.startsWith(HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL))) { + containsAutogenColumnAlias = true; + } if (!expr.equals(colName)) { // CAST(expr AS COLTYPE) AS COLNAME sb.append("CAST(" + expr + " AS " + colTypes.get(index) + ") AS `" + colName + "`"); + doColumnMasking = true; } else { - sb.append(expr); - } - } - } else { - for (int index = 0; index < privObject.getColumns().size(); index++) { - String expr = privObject.getColumns().get(index); - if (!firstOne) { - sb.append(", "); - } else { - firstOne = false; + sb.append(colName); } - sb.append(expr); } + } + if (!doColumnMasking) { + sb = new StringBuilder(); + sb.append("(SELECT *"); } sb.append(" FROM `" + privObject.getDbname() + "`.`" + privObject.getObjectName() + "`"); sb.append(" " + maskAndFilterInfo.additionalTabInfo); String filter = privObject.getRowFilterExpression(); if (filter != null) { + doRowFiltering = true; sb.append(" WHERE " + filter); } - sb.append(")" + maskAndFilterInfo.alias); - LOG.debug("TableMask creates `" + sb.toString() + "`"); - return sb.toString(); + sb.append(") " + maskAndFilterInfo.alias); + + if (!doColumnMasking && !doRowFiltering) { + // nothing to do + return null; + } else if (doColumnMasking && containsAutogenColumnAlias) { + throw new SemanticException( + "Hive column masking do not support autogen column alias. Please (1) specify the column alias for all autogen columns explicitly; or (2) turn off the security module."); + } else { + LOG.debug("TableMask creates `" + sb.toString() + "`"); + return sb.toString(); + } } void addTableMasking(ASTNode node, String replacementText) throws SemanticException { diff --git a/ql/src/test/queries/clientnegative/masking_6.q b/ql/src/test/queries/clientnegative/masking_6.q new file mode 100644 index 0000000..8f4ee44 --- /dev/null +++ b/ql/src/test/queries/clientnegative/masking_6.q @@ -0,0 +1,13 @@ +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create view masking_test as select cast(key as int) as key, value, '12' from src; + +explain select * from masking_test; + +select * from masking_test; + +explain select * from masking_test where key > 0; + +select * from masking_test where key > 0; + diff --git a/ql/src/test/queries/clientpositive/masking_6.q b/ql/src/test/queries/clientpositive/masking_6.q new file mode 100644 index 0000000..8477d2a --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_6.q @@ -0,0 +1,26 @@ +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +drop view masking_test; + +create view masking_test as select cast(key as int) as key, '12' from src; + +explain select * from masking_test; + +select * from masking_test; + +explain select * from masking_test where key > 0; + +select * from masking_test where key > 0; + +drop view masking_test; + +create view masking_test as select cast(key as int) as key, value, '12' as col from src; + +explain select * from masking_test; + +select * from masking_test; + +explain select * from masking_test where key > 0; + +select * from masking_test where key > 0; diff --git a/ql/src/test/results/clientnegative/masking_6.q.out b/ql/src/test/results/clientnegative/masking_6.q.out new file mode 100644 index 0000000..57d6d84 --- /dev/null +++ b/ql/src/test/results/clientnegative/masking_6.q.out @@ -0,0 +1,11 @@ +PREHOOK: query: create view masking_test as select cast(key as int) as key, value, '12' from src +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create view masking_test as select cast(key as int) as key, value, '12' from src +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +FAILED: SemanticException Hive column masking do not support autogen column alias. Please (1) specify the column alias for all autogen columns explicitly; or (2) turn off the security module. diff --git a/ql/src/test/results/clientpositive/masking_6.q.out b/ql/src/test/results/clientpositive/masking_6.q.out new file mode 100644 index 0000000..34cf0f1 --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_6.q.out @@ -0,0 +1,234 @@ +PREHOOK: query: drop view masking_test +PREHOOK: type: DROPVIEW +POSTHOOK: query: drop view masking_test +POSTHOOK: type: DROPVIEW +PREHOOK: query: create view masking_test as select cast(key as int) as key, '12' from src +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create view masking_test as select cast(key as int) as key, '12' from src +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +PREHOOK: query: explain select * from masking_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean) + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), '12' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 9 Data size: 901 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 12 +4 12 +8 12 +0 12 +0 12 +2 12 +PREHOOK: query: explain select * from masking_test where key > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test where key > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0)) (type: boolean) + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), '12' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 12 +8 12 +2 12 +PREHOOK: query: drop view masking_test +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@masking_test +PREHOOK: Output: default@masking_test +POSTHOOK: query: drop view masking_test +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: default@masking_test +PREHOOK: query: create view masking_test as select cast(key as int) as key, value, '12' as col from src +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create view masking_test as select cast(key as int) as key, value, '12' as col from src +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +PREHOOK: query: explain select * from masking_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), reverse(value) (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0_lav 12 +4 4_lav 12 +8 8_lav 12 +0 0_lav 12 +0 0_lav 12 +2 2_lav 12 +PREHOOK: query: explain select * from masking_test where key > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test where key > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10) and (UDFToInteger(key) > 0)) (type: boolean) + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), reverse(value) (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test where key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test where key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@src +#### A masked pattern was here #### +4 4_lav 12 +8 8_lav 12 +2 2_lav 12