Index: conf/hive-default.xml =================================================================== --- conf/hive-default.xml (revision 4427) +++ conf/hive-default.xml (working copy) @@ -293,6 +293,13 @@ + hive.script.operator.id.env.var + HIVE_SCRIPT_OPERATOR_ID + Name of the environment variable that holds the unique script operator ID in the user's transform function (the custom mapper/reducer that the user has specified in the query) + + + + hive.exec.compress.output false This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 4427) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -119,6 +119,7 @@ HIVETABLENAME("hive.table.name", ""), HIVEPARTITIONNAME("hive.partition.name", ""), HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false), + HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"), HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"), HIVEALIAS("hive.alias", ""), HIVEMAPSIDEAGGREGATE("hive.map.aggr", "true"), @@ -129,7 +130,7 @@ HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000), HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float)0.5), HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float)0.5), - + // for hive udtf operator HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false), Index: ql/src/test/results/clientpositive/script_env_var2.q.out =================================================================== --- ql/src/test/results/clientpositive/script_env_var2.q.out (revision 0) +++ ql/src/test/results/clientpositive/script_env_var2.q.out (revision 0) @@ -0,0 +1,16 @@ +PREHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000 +POSTHOOK: query: -- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1036835170/10000 +1 +1 Index: ql/src/test/results/clientpositive/script_env_var1.q.out =================================================================== --- ql/src/test/results/clientpositive/script_env_var1.q.out (revision 0) +++ ql/src/test/results/clientpositive/script_env_var1.q.out (revision 0) @@ -0,0 +1,18 @@ +PREHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000 +POSTHOOK: query: -- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/hive-946/trunk/VENDOR.hive/trunk/build/ql/tmp/1389301646/10000 +1 +1 Index: ql/src/test/queries/clientpositive/script_env_var2.q =================================================================== --- ql/src/test/queries/clientpositive/script_env_var2.q (revision 0) +++ ql/src/test/queries/clientpositive/script_env_var2.q (revision 0) @@ -0,0 +1,5 @@ +set hive.script.operator.id.env.var = MY_ID; +-- Same test as script_env_var1, but test setting the variable name +SELECT count(1) FROM +( SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $MY_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key; Index: ql/src/test/queries/clientpositive/script_env_var1.q =================================================================== --- ql/src/test/queries/clientpositive/script_env_var1.q (revision 0) +++ ql/src/test/queries/clientpositive/script_env_var1.q (revision 0) @@ -0,0 +1,5 @@ +-- Verifies that script operator ID environment variables have unique values +-- in each instance of the script operator. +SELECT count(1) FROM +( SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 UNION ALL + SELECT TRANSFORM('echo $HIVE_SCRIPT_OPERATOR_ID') USING 'bash' AS key FROM src LIMIT 1 ) a GROUP BY key; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (revision 4427) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java (working copy) @@ -250,6 +250,12 @@ Map env = pb.environment(); addJobConfToEnvironment(hconf, env); env.put(safeEnvVarName(HiveConf.ConfVars.HIVEALIAS.varname), String.valueOf(alias)); + + // Create an environment variable that uniquely identifies this script operator + String idEnvVarName = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVESCRIPTIDENVVAR); + String idEnvVarVal = this.getOperatorId(); + env.put(safeEnvVarName(idEnvVarName), idEnvVarVal); + scriptPid = pb.start(); // Runtime.getRuntime().exec(wrappedCmdArgs); DataOutputStream scriptOut = new DataOutputStream(new BufferedOutputStream(scriptPid.getOutputStream()));