Index: conf/hive-default.xml
===================================================================
--- conf/hive-default.xml (revision 1038444)
+++ conf/hive-default.xml (working copy)
@@ -786,4 +786,10 @@
Default property values for newly created tables
+
+ hive.variable.substitute
+ true
+ This enables substitution using syntax like ${var} ${system:var} and ${env:var}.
+
+
Index: docs/xdocs/language_manual/var_substitution.xml
===================================================================
--- docs/xdocs/language_manual/var_substitution.xml (revision 0)
+++ docs/xdocs/language_manual/var_substitution.xml (revision 0)
@@ -0,0 +1,130 @@
+
+
+
+
+
+
+ Hadoop Hive- Variable Substitution
+ Hadoop Hive Documentation Team
+
+
+
+Hive Variable Substitution
+
+
+Hive is used for both interactive queries as well as part. The hive variable substitution mechanism was
+designed to avoid some of the code that was getting baked into the scripting language ontop of hive. For example:
+
+
+
+
+are becoming common place. This is frustrating as hive becomes closely coupled with scripting languages. The hive
+startup time of a couple seconds is non-trivial when doing thousands of manipulations multiple hive -e invocations.
+
+
+Hive Variables combine the set capability you know and love with some limited yet powerful (evil laugh) substitution
+ability. For example:
+
+
+
+Results in:
+
+
+
+
+
+
+There are three namespaces for variables hiveconf,system, and env. hiveconf variables are set as normal:
+
+
+
+However they are retrieved using
+
+
+
+Annotated examples of usage from the test case ql/src/test/queries/clientpositive/set_processor_namespaces.q
+
+
+
+
+
+ Variable substitution is on by default. If this causes an issue with an already existing script disable it.
+
+
+
+
+
+
+
Index: docs/stylesheets/project.xml
===================================================================
--- docs/stylesheets/project.xml (revision 1038444)
+++ docs/stylesheets/project.xml (working copy)
@@ -29,6 +29,7 @@
+
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1038444)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -314,6 +314,9 @@
// Serde for FetchTask
HIVEFETCHOUTPUTSERDE("hive.fetch.output.serde", "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe"),
+ // Hive Variables
+ HIVEVARIABLESUBSTITUTE("hive.variable.substitute", true),
+
SEMANTIC_ANALYZER_HOOK("hive.semantic.analyzer.hook",null),
;
@@ -621,4 +624,5 @@
public static String getColumnInternalName(int pos) {
return "_col" + pos;
}
+
}
Index: ql/src/test/results/clientpositive/set_processor_namespaces.q.out
===================================================================
--- ql/src/test/results/clientpositive/set_processor_namespaces.q.out (revision 0)
+++ ql/src/test/results/clientpositive/set_processor_namespaces.q.out (revision 0)
@@ -0,0 +1,65 @@
+zzz=5
+system:xxx=5
+system:yyy=5
+go=5
+raw=${hiveconf:zzz}
+PREHOOK: query: EXPLAIN SELECT * FROM src where key=5
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM src where key=5
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 5))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Filter Operator
+ predicate:
+ expr: (key = 5)
+ type: boolean
+ Filter Operator
+ predicate:
+ expr: (key = 5)
+ type: boolean
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT * FROM src where key=5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/tmp/edward/hive_2010-11-23_22-04-47_164_8887552189029512035/-mr-10000
+POSTHOOK: query: SELECT * FROM src where key=5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/tmp/edward/hive_2010-11-23_22-04-47_164_8887552189029512035/-mr-10000
+5 val_5
+5 val_5
+5 val_5
+c=1
+../lib/derby.jar
+/home/edward/hive-stats.txt
+/home/edward/hive-stats.txt
Index: ql/src/test/queries/clientpositive/set_processor_namespaces.q
===================================================================
--- ql/src/test/queries/clientpositive/set_processor_namespaces.q (revision 0)
+++ ql/src/test/queries/clientpositive/set_processor_namespaces.q (revision 0)
@@ -0,0 +1,32 @@
+set zzz=5;
+set zzz;
+
+set system:xxx=5;
+set system:xxx;
+
+set system:yyy=${system:xxx};
+set system:yyy;
+
+set go=${hiveconf:zzz};
+set go;
+
+set hive.variable.substitute=false;
+set raw=${hiveconf:zzz};
+set raw;
+
+set hive.variable.substitute=true;
+
+EXPLAIN SELECT * FROM src where key=${hiveconf:zzz};
+SELECT * FROM src where key=${hiveconf:zzz};
+
+set a=1;
+set b=a;
+set c=${hiveconf:${hiveconf:b}};
+set c;
+
+set jar=../lib/derby.jar;
+
+add file ${hiveconf:jar};
+list file;
+delete file ${hiveconf:jar};
+list file;
Index: ql/src/java/org/apache/hadoop/hive/ql/processors/AddResourceProcessor.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/processors/AddResourceProcessor.java (revision 1038444)
+++ ql/src/java/org/apache/hadoop/hive/ql/processors/AddResourceProcessor.java (working copy)
@@ -21,6 +21,7 @@
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
@@ -39,6 +40,7 @@
public CommandProcessorResponse run(String command) {
SessionState ss = SessionState.get();
+ command = new VariableSubstitution().substitute(ss.getConf(),command);
String[] tokens = command.split("\\s+");
SessionState.ResourceType t;
if (tokens.length < 2
Index: ql/src/java/org/apache/hadoop/hive/ql/processors/DeleteResourceProcessor.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/processors/DeleteResourceProcessor.java (revision 1038444)
+++ ql/src/java/org/apache/hadoop/hive/ql/processors/DeleteResourceProcessor.java (working copy)
@@ -21,6 +21,7 @@
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
@@ -38,6 +39,7 @@
public CommandProcessorResponse run(String command) {
SessionState ss = SessionState.get();
+ command = new VariableSubstitution().substitute(ss.getConf(),command);
String[] tokens = command.split("\\s+");
SessionState.ResourceType t;
Index: ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java (revision 1038444)
+++ ql/src/java/org/apache/hadoop/hive/ql/processors/SetProcessor.java (working copy)
@@ -18,11 +18,12 @@
package org.apache.hadoop.hive.ql.processors;
+import java.util.Map;
import java.util.Properties;
-import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
import org.apache.hadoop.hive.ql.session.SessionState;
/**
@@ -32,6 +33,9 @@
public class SetProcessor implements CommandProcessor {
private static String prefix = "set: ";
+ public static final String ENV_PREFIX = "env:";
+ public static final String SYSTEM_PREFIX = "system:";
+ public static final String HIVECONF_PREFIX = "hiveconf:";
public static boolean getBoolean(String value) {
if (value.equals("on") || value.equals("true")) {
@@ -57,6 +61,16 @@
for (Map.Entry entries : sortedMap.entrySet()) {
ss.out.println(entries.getKey() + "=" + entries.getValue());
}
+
+ for (Map.Entry entry : mapToSortedMap(System.getenv()).entrySet()) {
+ ss.out.println(ENV_PREFIX+entry.getKey() + "=" + entry.getValue());
+ }
+
+ for (Map.Entry entry :
+ propertiesToSortedMap(System.getProperties()).entrySet() ) {
+ ss.out.println(SYSTEM_PREFIX+entry.getKey() + "=" + entry.getValue());
+ }
+
}
private void dumpOption(Properties p, String s) {
@@ -72,6 +86,80 @@
public void init() {
}
+ private CommandProcessorResponse setVariable(String varname, String varvalue){
+ SessionState ss = SessionState.get();
+ if (varname.startsWith(SetProcessor.ENV_PREFIX)){
+ ss.err.println("env:* variables can not be set.");
+ return new CommandProcessorResponse(1);
+ } else if (varname.startsWith(SetProcessor.SYSTEM_PREFIX)){
+ String propName = varname.substring(SetProcessor.SYSTEM_PREFIX.length());
+ System.getProperties().setProperty(propName, new VariableSubstitution().substitute(ss.getConf(),varvalue));
+ return new CommandProcessorResponse(0);
+ } else if (varname.startsWith(SetProcessor.HIVECONF_PREFIX)){
+ String propName = varname.substring(SetProcessor.HIVECONF_PREFIX.length());
+ ss.getConf().set(propName, new VariableSubstitution().substitute(ss.getConf(),varvalue));
+ return new CommandProcessorResponse(0);
+ } else {
+ ss.getConf().set(varname, new VariableSubstitution().substitute(ss.getConf(),varvalue) );
+ return new CommandProcessorResponse(0);
+ }
+ }
+
+ private SortedMap propertiesToSortedMap(Properties p){
+ SortedMap sortedPropMap = new TreeMap();
+ for (Map.Entry entry :System.getProperties().entrySet() ){
+ sortedPropMap.put( (String) entry.getKey(), (String) entry.getValue());
+ }
+ return sortedPropMap;
+ }
+
+ private SortedMap mapToSortedMap(Map data){
+ SortedMap sortedEnvMap = new TreeMap();
+ sortedEnvMap.putAll( data );
+ return sortedEnvMap;
+ }
+
+
+ private CommandProcessorResponse getVariable(String varname){
+ SessionState ss = SessionState.get();
+ if (varname.equals("silent")){
+ ss.out.println("silent" + "=" + ss.getIsSilent());
+ return new CommandProcessorResponse(0);
+ }
+ if (varname.startsWith(SetProcessor.SYSTEM_PREFIX)){
+ String propName = varname.substring(SetProcessor.SYSTEM_PREFIX.length());
+ String result = System.getProperty(propName);
+ if (result != null){
+ ss.out.println(SetProcessor.SYSTEM_PREFIX+propName + "=" + result);
+ return new CommandProcessorResponse(0);
+ } else {
+ ss.out.println( propName + " is undefined as a system property");
+ return new CommandProcessorResponse(1);
+ }
+ } else if (varname.indexOf(SetProcessor.ENV_PREFIX)==0){
+ String var = varname.substring(ENV_PREFIX.length());
+ if (System.getenv(var)!=null){
+ ss.out.println(SetProcessor.ENV_PREFIX+var + "=" + System.getenv(var));
+ return new CommandProcessorResponse(0);
+ } else {
+ ss.out.println(varname + " is undefined as an environmental variable");
+ return new CommandProcessorResponse(1);
+ }
+ } else if (varname.indexOf(SetProcessor.HIVECONF_PREFIX)==0) {
+ String var = varname.substring(SetProcessor.HIVECONF_PREFIX.length());
+ if (ss.getConf().get(var)!=null){
+ ss.out.println(SetProcessor.HIVECONF_PREFIX+var + "=" + ss.getConf().get(var));
+ return new CommandProcessorResponse(0);
+ } else {
+ ss.out.println(varname + " is undefined as a hive configuration variable");
+ return new CommandProcessorResponse(1);
+ }
+ } else {
+ dumpOption(ss.getConf().getAllProperties(), varname);
+ return new CommandProcessorResponse(0);
+ }
+ }
+
public CommandProcessorResponse run(String command) {
SessionState ss = SessionState.get();
@@ -87,31 +175,24 @@
}
String[] part = new String[2];
-
int eqIndex = nwcmd.indexOf('=');
- if (eqIndex == -1) {
- // no equality sign - print the property out
- dumpOption(ss.getConf().getAllProperties(), nwcmd);
- return new CommandProcessorResponse(0);
- } else if (eqIndex == nwcmd.length() - 1) {
- part[0] = nwcmd.substring(0, nwcmd.length() - 1);
- part[1] = "";
- } else {
- part[0] = nwcmd.substring(0, eqIndex).trim();
- part[1] = nwcmd.substring(eqIndex + 1).trim();
- }
- try {
+ if (nwcmd.contains("=")){
+ if (eqIndex == nwcmd.length() - 1) { //x=
+ part[0] = nwcmd.substring(0, nwcmd.length() - 1);
+ part[1] = "";
+ } else { //x=y
+ part[0] = nwcmd.substring(0, eqIndex).trim();
+ part[1] = nwcmd.substring(eqIndex + 1).trim();
+ }
if (part[0].equals("silent")) {
- boolean val = getBoolean(part[1]);
- ss.setIsSilent(val);
- } else {
- ss.getConf().set(part[0], part[1]);
+ ss.setIsSilent(getBoolean(part[1]));
+ return new CommandProcessorResponse(0);
}
- } catch (IllegalArgumentException err) {
- ss.err.println(err.getMessage());
- return new CommandProcessorResponse(1);
+ return setVariable(part[0],part[1]);
+ } else {
+ return getVariable(nwcmd);
}
- return new CommandProcessorResponse(0);
+
}
}
Index: ql/src/java/org/apache/hadoop/hive/ql/processors/DfsProcessor.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/processors/DfsProcessor.java (revision 1038444)
+++ ql/src/java/org/apache/hadoop/hive/ql/processors/DfsProcessor.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FsShell;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
@@ -46,10 +47,13 @@
}
public CommandProcessorResponse run(String command) {
- String[] tokens = command.split("\\s+");
+
try {
SessionState ss = SessionState.get();
+ command = new VariableSubstitution().substitute(ss.getConf(),command);
+
+ String[] tokens = command.split("\\s+");
PrintStream oldOut = System.out;
if (ss != null && ss.out != null) {
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/VariableSubstitution.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/VariableSubstitution.java (revision 0)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/VariableSubstitution.java (revision 0)
@@ -0,0 +1,65 @@
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.processors.SetProcessor;
+
+public class VariableSubstitution {
+
+ private static final Log l4j = LogFactory.getLog(VariableSubstitution.class);
+ protected static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
+ protected static int MAX_SUBST = 40;
+
+ public String substitute (HiveConf conf, String expr) {
+
+ if (conf.getBoolVar(ConfVars.HIVEVARIABLESUBSTITUTE)){
+ l4j.debug("Substitution is on: "+expr);
+ } else {
+ return expr;
+ }
+ if (expr == null) {
+ return null;
+ }
+ Matcher match = varPat.matcher("");
+ String eval = expr;
+ for(int s=0; s