diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java index aa5d0bf..40e187c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java @@ -90,14 +90,13 @@ public class ScriptOperator extends Operator implements // of the user assumptions. transient boolean firstRow; - /** - * addJobConfToEnvironment is shamelessly copied from hadoop streaming. - */ - static String safeEnvVarName(String var) { + + String safeEnvVarName(String name) { StringBuilder safe = new StringBuilder(); - int len = var.length(); + int len = name.length(); + for (int i = 0; i < len; i++) { - char c = var.charAt(i); + char c = name.charAt(i); char s; if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { @@ -110,8 +109,30 @@ public class ScriptOperator extends Operator implements return safe.toString(); } - static void addJobConfToEnvironment(Configuration conf, - Map env) { + /** + * Most UNIX implementations impose some limit on the total size of environment variables and + * size of strings. To fit in this limit we need sometimes to truncate strings. + * @param value environment variable value to check + * @param name name of variable (used only for logging purposes) + * @return original value, or truncated one if it's length is more then 20KB + * @see Linux + * Man page for more details + */ + String safeEnvVarValue(String value, String name) { + final int len_limit = 20*1024; + if (value.length() > len_limit) { + value = value.substring(0,len_limit); + LOG.warn("Length of environment variable " + name + " was truncated to " + len_limit + + " bytes to fit system limits."); + } + return value; + } + + /** + * addJobConfToEnvironment is mostly shamelessly copied from hadoop streaming. Added additional + * check on evironment variable length + */ + void addJobConfToEnvironment(Configuration conf, Map env) { Iterator> it = conf.iterator(); while (it.hasNext()) { Map.Entry en = it.next(); @@ -120,6 +141,7 @@ public class ScriptOperator extends Operator implements // expansion String value = conf.get(name); // does variable expansion name = safeEnvVarName(name); + value = safeEnvVarValue(value, name); env.put(name, value); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index 0ee9aeb..6dcce50 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -19,9 +19,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.Map; +import java.util.*; import junit.framework.TestCase; @@ -185,6 +183,35 @@ public class TestOperators extends TestCase { } } + /** + * When ScriptOperator runs external script, it passes job configuration as environment + * variables. But environment variables have some system limitations and we have to check + * job configuration properties firstly. This test checks that staff. + */ + public void testScriptOperatorEnvVarsProcessing() throws Throwable { + try { + ScriptOperator scriptOperator = new ScriptOperator(); + + //Environment Variables name + assertEquals("a_b_c",scriptOperator.safeEnvVarName("a.b.c")); + assertEquals("a_b_c",scriptOperator.safeEnvVarName("a-b-c")); + + //Environment Variables value + assertEquals("value",scriptOperator.safeEnvVarValue("value", "name")); + + char [] array = new char[20*1024+1]; + Arrays.fill(array,'a'); + String hugeEnvVar = new String(array); + assertEquals(20*1024+1, hugeEnvVar.length()); + assertEquals(20*1024, scriptOperator.safeEnvVarValue(hugeEnvVar,"name").length()); + + System.out.println("Script Operator Environment Variables processing ok"); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } + public void testScriptOperator() throws Throwable { try { System.out.println("Testing Script Operator");