diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b7e9d1f..04a42a9 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -338,6 +338,7 @@ public class HiveConf extends Configuration { HIVEPARTITIONNAME("hive.partition.name", ""), HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false), HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID"), + HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false), HIVEMAPREDMODE("hive.mapred.mode", "nonstrict"), HIVEALIAS("hive.alias", ""), HIVEMAPSIDEAGGREGATE("hive.map.aggr", true), diff --git conf/hive-default.xml.template conf/hive-default.xml.template index 9ada1e3..3b7eb75 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -489,6 +489,12 @@ + hive.script.operator.truncate.env + false + Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits) + + + hive.exec.compress.output false This controls whether the final outputs of a query (to a local/hdfs file or a hive table) is compressed. The compression codec and other options are determined from hadoop config variables mapred.output.compress* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java index aa5d0bf..1d19030 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java @@ -90,14 +90,13 @@ public class ScriptOperator extends Operator implements // of the user assumptions. transient boolean firstRow; - /** - * addJobConfToEnvironment is shamelessly copied from hadoop streaming. - */ - static String safeEnvVarName(String var) { + + String safeEnvVarName(String name) { StringBuilder safe = new StringBuilder(); - int len = var.length(); + int len = name.length(); + for (int i = 0; i < len; i++) { - char c = var.charAt(i); + char c = name.charAt(i); char s; if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { @@ -110,8 +109,32 @@ public class ScriptOperator extends Operator implements return safe.toString(); } - static void addJobConfToEnvironment(Configuration conf, - Map env) { + /** + * Most UNIX implementations impose some limit on the total size of environment variables and + * size of strings. To fit in this limit we need sometimes to truncate strings. + * @param value environment variable value to check + * @param name name of variable (used only for logging purposes) + * @param truncate truncate value or not + * @return original value, or truncated one if it's length is more then 20KB and + * truncate flag is set + * @see Linux + * Man page for more details + */ + String safeEnvVarValue(String value, String name, boolean truncate) { + final int lenLimit = 20*1024; + if (truncate && value.length() > lenLimit) { + value = value.substring(0, lenLimit); + LOG.warn("Length of environment variable " + name + " was truncated to " + lenLimit + + " bytes to fit system limits."); + } + return value; + } + + /** + * addJobConfToEnvironment is mostly shamelessly copied from hadoop streaming. Added additional + * check on environment variable length + */ + void addJobConfToEnvironment(Configuration conf, Map env) { Iterator> it = conf.iterator(); while (it.hasNext()) { Map.Entry en = it.next(); @@ -120,6 +143,8 @@ public class ScriptOperator extends Operator implements // expansion String value = conf.get(name); // does variable expansion name = safeEnvVarName(name); + boolean truncate = conf.getBoolean(HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV.toString(), false); + value = safeEnvVarValue(value, name, truncate); env.put(name, value); } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java index 0ee9aeb..1e0b460 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java @@ -19,9 +19,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; -import java.util.ArrayList; -import java.util.LinkedHashMap; -import java.util.Map; +import java.util.*; import junit.framework.TestCase; @@ -185,6 +183,51 @@ public class TestOperators extends TestCase { } } + /** + * When ScriptOperator runs external script, it passes job configuration as environment + * variables. But environment variables have some system limitations and we have to check + * job configuration properties firstly. This test checks that staff. + */ + public void testScriptOperatorEnvVarsProcessing() throws Throwable { + try { + ScriptOperator scriptOperator = new ScriptOperator(); + + //Environment Variables name + assertEquals("a_b_c", scriptOperator.safeEnvVarName("a.b.c")); + assertEquals("a_b_c", scriptOperator.safeEnvVarName("a-b-c")); + + //Environment Variables short values + assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", false)); + assertEquals("value", scriptOperator.safeEnvVarValue("value", "name", true)); + + //Environment Variables long values + char [] array = new char[20*1024+1]; + Arrays.fill(array, 'a'); + String hugeEnvVar = new String(array); + assertEquals(20*1024+1, hugeEnvVar.length()); + assertEquals(20*1024+1, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", false).length()); + assertEquals(20*1024, scriptOperator.safeEnvVarValue(hugeEnvVar, "name", true).length()); + + //Full test + Configuration hconf = new JobConf(ScriptOperator.class); + hconf.set("name", hugeEnvVar); + Map env = new HashMap(); + + HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, false); + scriptOperator.addJobConfToEnvironment(hconf, env); + assertEquals(20*1024+1, env.get("name").length()); + + HiveConf.setBoolVar(hconf, HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV, true); + scriptOperator.addJobConfToEnvironment(hconf, env); + assertEquals(20*1024, env.get("name").length()); + + System.out.println("Script Operator Environment Variables processing ok"); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } + public void testScriptOperator() throws Throwable { try { System.out.println("Testing Script Operator");