diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 10ad3ea..9449fce 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1596,6 +1596,8 @@ "readable text) or \"json\" (for a json object)."), HIVE_ENTITY_SEPARATOR("hive.entity.separator", "@", "Separator used to construct names of tables and partitions. For example, dbname@tablename@partitionname"), + HIVE_CAPTURE_TRANSFORM_ENTITY("hive.entity.capture.transform", false, + "Compiler to capture transform URI referred in the query"), HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY("hive.display.partition.cols.separately", true, "In older Hive version (0.10 and earlier) no distinction was made between\n" + "partition columns or non-partition columns while displaying columns in describe\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 29be691..d1de485 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3176,6 +3176,13 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); output.setColumnExprMap(new HashMap()); // disable backtracking + // Add URI entity for transform script. script assumed t be local unless downloadable + if (conf.getBoolVar(ConfVars.HIVE_CAPTURE_TRANSFORM_ENTITY)) { + String scriptCmd = getScriptProgName(stripQuotes(trfm.getChild(execPos).getText())); + getInputs().add(new ReadEntity(new Path(scriptCmd), + !SessionState.canDownloadResource(scriptCmd))); + } + return output; } diff --git ql/src/test/queries/clientnegative/authorization_disallow_transform.q ql/src/test/queries/clientnegative/authorization_disallow_transform.q index 1702bcf..35c0653 100644 --- ql/src/test/queries/clientnegative/authorization_disallow_transform.q +++ ql/src/test/queries/clientnegative/authorization_disallow_transform.q @@ -1,6 +1,7 @@ set hive.test.authz.sstd.hs2.mode=true; set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; set hive.security.authorization.enabled=true; +set hive.entity.capture.transform=true; set role ALL; create table t1(i int); SELECT TRANSFORM (*) USING 'cat' AS (key, value) FROM t1; diff --git ql/src/test/queries/clientpositive/ppd_transform.q ql/src/test/queries/clientpositive/ppd_transform.q index 530ef9c..5b57b8f 100644 --- ql/src/test/queries/clientpositive/ppd_transform.q +++ ql/src/test/queries/clientpositive/ppd_transform.q @@ -1,5 +1,6 @@ set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=false; +set hive.entity.capture.transform=true; EXPLAIN FROM ( diff --git ql/src/test/queries/clientpositive/select_transform_hint.q ql/src/test/queries/clientpositive/select_transform_hint.q index 33959ed..b5a8147 100644 --- ql/src/test/queries/clientpositive/select_transform_hint.q +++ ql/src/test/queries/clientpositive/select_transform_hint.q @@ -1,3 +1,5 @@ +set hive.entity.capture.transform=true; + EXPLAIN SELECT /*+MAPJOIN(a)*/ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) @@ -21,4 +23,4 @@ on a.key = b.key; SELECT /*+STREAMTABLE(a)*/ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b -on a.key = b.key; \ No newline at end of file +on a.key = b.key; diff --git ql/src/test/queries/clientpositive/transform1.q ql/src/test/queries/clientpositive/transform1.q index 3bed2b6..15e42e7 100644 --- ql/src/test/queries/clientpositive/transform1.q +++ ql/src/test/queries/clientpositive/transform1.q @@ -1,3 +1,4 @@ +set hive.entity.capture.transform=true; create table transform1_t1(a string, b string); diff --git ql/src/test/queries/clientpositive/transform_acid.q ql/src/test/queries/clientpositive/transform_acid.q index 4cb9e38..94782f1 100644 --- ql/src/test/queries/clientpositive/transform_acid.q +++ ql/src/test/queries/clientpositive/transform_acid.q @@ -1,3 +1,4 @@ +set hive.entity.capture.transform=true; set hive.support.concurrency=true; set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; set hive.enforce.bucketing=true; diff --git ql/src/test/queries/clientpositive/transform_ppr1.q ql/src/test/queries/clientpositive/transform_ppr1.q index 07bb4d1..424fff2 100644 --- ql/src/test/queries/clientpositive/transform_ppr1.q +++ ql/src/test/queries/clientpositive/transform_ppr1.q @@ -1,4 +1,5 @@ set hive.optimize.ppd=true; +set hive.entity.capture.transform=true; EXPLAIN EXTENDED FROM ( diff --git ql/src/test/queries/clientpositive/transform_ppr2.q ql/src/test/queries/clientpositive/transform_ppr2.q index 85ef3ac..77f4e49 100644 --- ql/src/test/queries/clientpositive/transform_ppr2.q +++ ql/src/test/queries/clientpositive/transform_ppr2.q @@ -1,4 +1,5 @@ set hive.optimize.ppd=true; +set hive.entity.capture.transform=true; EXPLAIN EXTENDED FROM ( diff --git ql/src/test/results/clientnegative/authorization_disallow_transform.q.out ql/src/test/results/clientnegative/authorization_disallow_transform.q.out index 812c1a2..75203f9 100644 --- ql/src/test/results/clientnegative/authorization_disallow_transform.q.out +++ ql/src/test/results/clientnegative/authorization_disallow_transform.q.out @@ -10,11 +10,4 @@ POSTHOOK: query: create table t1(i int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@t1 -PREHOOK: query: SELECT TRANSFORM (*) USING 'cat' AS (key, value) FROM t1 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -#### A masked pattern was here #### -FAILED: Hive Internal Error: org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException(Query with transform clause is disallowed in current configuration.) -org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException: Query with transform clause is disallowed in current configuration. -#### A masked pattern was here #### - +FAILED: HiveAccessControlException Permission denied: Principal [name=hive_test_user, type=USER] does not have following privileges for operation QUERY [[SELECT] on Object [type=LOCAL_URI, name=cat]] diff --git ql/src/test/results/clientpositive/ppd_transform.q.out ql/src/test/results/clientpositive/ppd_transform.q.out index 0be845f..f5312eb 100644 --- ql/src/test/results/clientpositive/ppd_transform.q.out +++ ql/src/test/results/clientpositive/ppd_transform.q.out @@ -74,6 +74,7 @@ PREHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: FROM ( @@ -84,6 +85,7 @@ POSTHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 @@ -246,6 +248,7 @@ PREHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: FROM ( @@ -256,6 +259,7 @@ POSTHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 diff --git ql/src/test/results/clientpositive/select_transform_hint.q.out ql/src/test/results/clientpositive/select_transform_hint.q.out index e9cb3d2..43b1ada 100644 --- ql/src/test/results/clientpositive/select_transform_hint.q.out +++ ql/src/test/results/clientpositive/select_transform_hint.q.out @@ -80,6 +80,7 @@ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT /*+MAPJOIN(a)*/ @@ -87,6 +88,7 @@ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@src #### A masked pattern was here #### 238 val_238 @@ -1195,6 +1197,7 @@ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: SELECT /*+STREAMTABLE(a)*/ @@ -1202,6 +1205,7 @@ TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 diff --git ql/src/test/results/clientpositive/tez/transform1.q.out ql/src/test/results/clientpositive/tez/transform1.q.out index ffe911c..718f128 100644 --- ql/src/test/results/clientpositive/tez/transform1.q.out +++ ql/src/test/results/clientpositive/tez/transform1.q.out @@ -53,10 +53,12 @@ STAGE PLANS: PREHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@transform1_t1 #### A masked pattern was here #### POSTHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@transform1_t1 #### A masked pattern was here #### PREHOOK: query: create table transform1_t2(col array) @@ -125,10 +127,12 @@ STAGE PLANS: PREHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@transform1_t2 #### A masked pattern was here #### POSTHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@transform1_t2 #### A masked pattern was here #### [0,1,2] diff --git ql/src/test/results/clientpositive/tez/transform_ppr1.q.out ql/src/test/results/clientpositive/tez/transform_ppr1.q.out index 4a5d7ef..a91ee15 100644 --- ql/src/test/results/clientpositive/tez/transform_ppr1.q.out +++ ql/src/test/results/clientpositive/tez/transform_ppr1.q.out @@ -372,6 +372,7 @@ PREHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 @@ -386,6 +387,7 @@ POSTHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 diff --git ql/src/test/results/clientpositive/tez/transform_ppr2.q.out ql/src/test/results/clientpositive/tez/transform_ppr2.q.out index 9700e45..de8fc90 100644 --- ql/src/test/results/clientpositive/tez/transform_ppr2.q.out +++ ql/src/test/results/clientpositive/tez/transform_ppr2.q.out @@ -281,6 +281,7 @@ PREHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 @@ -294,6 +295,7 @@ POSTHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 diff --git ql/src/test/results/clientpositive/transform1.q.out ql/src/test/results/clientpositive/transform1.q.out index a0386ef..9382cf0 100644 --- ql/src/test/results/clientpositive/transform1.q.out +++ ql/src/test/results/clientpositive/transform1.q.out @@ -50,10 +50,12 @@ STAGE PLANS: PREHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@transform1_t1 #### A masked pattern was here #### POSTHOOK: query: SELECT transform(*) USING 'cat' AS (col array) FROM transform1_t1 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@transform1_t1 #### A masked pattern was here #### PREHOOK: query: create table transform1_t2(col array) @@ -119,10 +121,12 @@ STAGE PLANS: PREHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@transform1_t2 #### A masked pattern was here #### POSTHOOK: query: SELECT transform('0\0021\0022') USING 'cat' AS (col array) FROM transform1_t2 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@transform1_t2 #### A masked pattern was here #### [0,1,2] diff --git ql/src/test/results/clientpositive/transform_acid.q.out ql/src/test/results/clientpositive/transform_acid.q.out index 704a261..29d0638 100644 --- ql/src/test/results/clientpositive/transform_acid.q.out +++ ql/src/test/results/clientpositive/transform_acid.q.out @@ -23,9 +23,11 @@ POSTHOOK: Lineage: transform_acid.b EXPRESSION [(alltypesorc)alltypesorc.FieldSc PREHOOK: query: SELECT transform(*) USING 'transform_acid_grep.sh' AS (col string) FROM transform_acid PREHOOK: type: QUERY PREHOOK: Input: default@transform_acid +PREHOOK: Input: transform_acid_grep.sh #### A masked pattern was here #### POSTHOOK: query: SELECT transform(*) USING 'transform_acid_grep.sh' AS (col string) FROM transform_acid POSTHOOK: type: QUERY POSTHOOK: Input: default@transform_acid +POSTHOOK: Input: transform_acid_grep.sh #### A masked pattern was here #### a diff --git ql/src/test/results/clientpositive/transform_ppr1.q.out ql/src/test/results/clientpositive/transform_ppr1.q.out index 00e62a2..9430d65 100644 --- ql/src/test/results/clientpositive/transform_ppr1.q.out +++ ql/src/test/results/clientpositive/transform_ppr1.q.out @@ -366,6 +366,7 @@ PREHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 @@ -380,6 +381,7 @@ POSTHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 AND tmap.ds = '2008-04-08' POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 diff --git ql/src/test/results/clientpositive/transform_ppr2.q.out ql/src/test/results/clientpositive/transform_ppr2.q.out index 57bf84f..122abba 100644 --- ql/src/test/results/clientpositive/transform_ppr2.q.out +++ ql/src/test/results/clientpositive/transform_ppr2.q.out @@ -275,6 +275,7 @@ PREHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 PREHOOK: type: QUERY +PREHOOK: Input: cat PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 @@ -288,6 +289,7 @@ POSTHOOK: query: FROM ( ) tmap SELECT tmap.tkey, tmap.tvalue WHERE tmap.tkey < 100 POSTHOOK: type: QUERY +POSTHOOK: Input: cat POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12