diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index b7e9d1f..04a42a9 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -338,6 +338,7 @@ public class HiveConf extends Configuration {
HIVEPARTITIONNAME("hive.partition.name", ""),
HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false),
HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"),
+ HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false),
HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"),
HIVEALIAS("hive.alias", ""),
HIVEMAPSIDEAGGREGATE("hive.map.aggr", true),
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
index 9ada1e3..3b7eb75 100644
--- conf/hive-default.xml.template
+++ conf/hive-default.xml.template
@@ -489,6 +489,12 @@
+ hive.script.operator.truncate.env
+ false
+ Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)
+
+
+
hive.exec.compress.output
false
This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress*
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
index aa5d0bf..1d19030 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java
@@ -90,14 +90,13 @@ public class ScriptOperator extends Operator implements
// of the user assumptions.
transient boolean firstRow;
- /**
- * addJobConfToEnvironment is shamelessly copied from hadoop streaming.
- */
- static String safeEnvVarName(String var) {
+
+ String safeEnvVarName(String name) {
StringBuilder safe = new StringBuilder();
- int len = var.length();
+ int len = name.length();
+
for (int i = 0; i < len; i++) {
- char c = var.charAt(i);
+ char c = name.charAt(i);
char s;
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z')) {
@@ -110,8 +109,32 @@ public class ScriptOperator extends Operator implements
return safe.toString();
}
- static void addJobConfToEnvironment(Configuration conf,
- Map env) {
+ /**
+ * Most UNIX implementations impose some limit on the total size of environment variables and
+ * size of strings. To fit in this limit we need sometimes to truncate strings.
+ * @param value environment variable value to check
+ * @param name name of variable (used only for logging purposes)
+ * @param truncate truncate value or not
+ * @return original value, or truncated one if it's length is more then 20KB and
+ * truncate flag is set
+ * @see Linux
+ * Man page for more details
+ */
+ String safeEnvVarValue(String value, String name, boolean truncate) {
+ final int lenLimit = 20*1024;
+ if (truncate && value.length() > lenLimit) {
+ value = value.substring(0, lenLimit);
+ LOG.warn("Length of environment variable " + name + " was truncated to " + lenLimit
+ + " bytes to fit system limits.");
+ }
+ return value;
+ }
+
+ /**
+ * addJobConfToEnvironment is mostly shamelessly copied from hadoop streaming. Added additional
+ * check on environment variable length
+ */
+ void addJobConfToEnvironment(Configuration conf, Map env) {
Iterator> it = conf.iterator();
while (it.hasNext()) {
Map.Entry en = it.next();
@@ -120,6 +143,8 @@ public class ScriptOperator extends Operator implements
// expansion
String value = conf.get(name); // does variable expansion
name = safeEnvVarName(name);
+ boolean truncate = conf.getBoolean(HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV.toString(), false);
+ value = safeEnvVarValue(value, name, truncate);
env.put(name, value);
}
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
index 0ee9aeb..1e0b460 100644
--- ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
+++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
@@ -19,9 +19,7 @@
package org.apache.hadoop.hive.ql.exec;
import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.LinkedHashMap;
-import java.util.Map;
+import java.util.*;
import junit.framework.TestCase;
@@ -185,6 +183,51 @@ public class TestOperators extends TestCase {
}
}
+ /**
+ * When ScriptOperator runs external script, it passes job configuration as environment
+ * variables. But environment variables have some system limitations and we have to check
+ * job configuration properties firstly. This test checks that staff.
+ */
+ public void testScriptOperatorEnvVarsProcessing() throws Throwable {
+ try {
+ ScriptOperator scriptOperator = new ScriptOperator();
+
+ //Environment Variables name
+ assertEquals("a_b_c", scriptOperator.safeEnvVarName("a.b.c"));
+ assertEquals("a_b_c", scriptOperator.safeEnvVarName("a-b-c"));
+
+ //Environment Variables short values
+ assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", false));
+ assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", true));
+
+ //Environment Variables long values
+ char [] array = new char[20*1024+1];
+ Arrays.fill(array, 'a');
+ String hugeEnvVar = new String(array);
+ assertEquals(20*1024+1, hugeEnvVar.length());
+ assertEquals(20*1024+1, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", false).length());
+ assertEquals(20*1024, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", true).length());
+
+ //Full test
+ Configuration hconf = new JobConf(ScriptOperator.class);
+ hconf.set("name", hugeEnvVar);
+ Map env = new HashMap();
+
+ HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, false);
+ scriptOperator.addJobConfToEnvironment(hconf, env);
+ assertEquals(20*1024+1, env.get("name").length());
+
+ HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, true);
+ scriptOperator.addJobConfToEnvironment(hconf, env);
+ assertEquals(20*1024, env.get("name").length());
+
+ System.out.println("Script Operator Environment Variables processing ok");
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
public void testScriptOperator() throws Throwable {
try {
System.out.println("Testing Script Operator");