Index: conf/hive-default.xml
===================================================================
--- conf/hive-default.xml (revision 4427)
+++ conf/hive-default.xml (working copy)
@@ -293,6 +293,13 @@
+ hive.script.operator.id.env.var
+ HIVE_SCRIPT_OPERATOR_ID
+ Name of the environment variable that holds the unique script operator ID in the user's transform function (the custom mapper/reducer that the user has specified in the query)
+
+
+
+
hive.exec.compress.output
false
This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress*
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 4427)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -119,6 +119,7 @@
HIVETABLENAME("hive.table.name", ""),
HIVEPARTITIONNAME("hive.partition.name", ""),
HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false),
+ HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"),
HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"),
HIVEALIAS("hive.alias", ""),
HIVEMAPSIDEAGGREGATE("hive.map.aggr", "true"),
@@ -129,7 +130,7 @@
HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000),
HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float)0.5),
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float)0.5),
-
+
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
Index: ql/src/test/results/clientpositive/script_env_var2.q.out
===================================================================
--- ql/src/test/results/clientpositive/script_env_var2.q.out (revision 0)
+++ ql/src/test/results/clientpositive/script_env_var2.q.out (revision 0)
@@ -0,0 +1,16 @@
+PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000
+POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000
+1
+1
Index: ql/src/test/results/clientpositive/script_env_var1.q.out
===================================================================
--- ql/src/test/results/clientpositive/script_env_var1.q.out (revision 0)
+++ ql/src/test/results/clientpositive/script_env_var1.q.out (revision 0)
@@ -0,0 +1,18 @@
+PREHOOK: query: -- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000
+POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000
+1
+1
Index: ql/src/test/queries/clientpositive/script_env_var2.q
===================================================================
--- ql/src/test/queries/clientpositive/script_env_var2.q (revision 0)
+++ ql/src/test/queries/clientpositive/script_env_var2.q (revision 0)
@@ -0,0 +1,5 @@
+set hive.script.operator.id.env.var = MY_ID;
+-- Same test as script_env_var1, but test setting the variable name
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key;
Index: ql/src/test/queries/clientpositive/script_env_var1.q
===================================================================
--- ql/src/test/queries/clientpositive/script_env_var1.q (revision 0)
+++ ql/src/test/queries/clientpositive/script_env_var1.q (revision 0)
@@ -0,0 +1,5 @@
+-- Verifies that script operator ID environment variables have unique values
+-- in each instance of the script operator.
+SELECT count(1) FROM
+( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL
+ SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key;
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (revision 4427)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (working copy)
@@ -250,6 +250,12 @@
Map env = pb.environment();
addJobConfToEnvironment(hconf, env);
env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String.valueOf(alias));
+
+ // Create an environment variable that uniquely identifies this script operator
+ String idEnvVarName = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESCRIPTIDENVVAR);
+ String idEnvVarVal = this.getOperatorId();
+ env.put(safeEnvVarName(idEnvVarName), idEnvVarVal);
+
scriptPid = pb.start(); // Runtime.getRuntime().exec(wrappedCmdArgs);
DataOutputStream scriptOut = new DataOutputStream(new BufferedOutputStream(scriptPid.getOutputStream()));