diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java index 5b857e2..307bc74 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ScriptOperator.java @@ -142,17 +142,27 @@ String safeEnvVarValue(String value, String name, boolean truncate) { * check on environment variable length */ void addJobConfToEnvironment(Configuration conf, Map env) { + Map props = getConf().getScriptProps(); Iterator> it = conf.iterator(); while (it.hasNext()) { Map.Entry en = it.next(); String name = en.getKey(); - // String value = (String)en.getValue(); // does not apply variable - // expansion - String value = conf.get(name); // does variable expansion - name = safeEnvVarName(name); - boolean truncate = conf.getBoolean(HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV.toString(), false); - value = safeEnvVarValue(value, name, truncate); - env.put(name, value); + if (props == null || (props.containsKey(name) && props.get(name) == null)) { + // String value = (String)en.getValue(); // does not apply variable + // expansion + String value = conf.get(name); // does variable expansion + name = safeEnvVarName(name); + boolean truncate = conf.getBoolean(HiveConf.ConfVars.HIVESCRIPTTRUNCATEENV.toString(), false); + value = safeEnvVarValue(value, name, truncate); + env.put(name, value); + } + } + if (props != null) { + for (Map.Entry entry : props.entrySet()) { + if (entry.getValue() != null) { + env.put(entry.getKey(), entry.getValue()); + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 75394f3..b21f35b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -488,7 +488,7 @@ public static void readProps( String key = unescapeSQLString(prop.getChild(propChild).getChild(0) .getText()); String value = null; - if (prop.getChild(propChild).getChild(1) != null) { + if (prop.getChild(propChild).getChild(1).getType() != HiveParser.TOK_NULL) { value = unescapeSQLString(prop.getChild(propChild).getChild(1).getText()); } mapProp.put(key, value); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g index 1855d7f..774e457 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g @@ -72,9 +72,30 @@ selectTrfmClause LPAREN selectExpressionList RPAREN inSerde=rowFormat inRec=recordWriter KW_USING StringLiteral - ( KW_AS ((LPAREN (aliasList | columnNameTypeList) RPAREN) | (aliasList | columnNameTypeList)))? + ( KW_AS ((LPAREN (aliasList | columnNameTypeList) RPAREN) | (aliasList | columnNameTypeList)))? (KW_WITH scriptProperties)? outSerde=rowFormat outRec=recordReader - -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList?) + -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList? scriptProperties?) + ; + +scriptProperties +@init { gParent.pushMsg("script properties", state); } +@after { gParent.popMsg(state); } + : + LPAREN scriptPropertiesList RPAREN -> ^(TOK_TABLEPROPERTIES scriptPropertiesList) + ; + +scriptPropertiesList +@init { gParent.pushMsg("script properties list", state); } +@after { gParent.popMsg(state); } + : + scriptProperty (COMMA scriptProperty)* -> ^(TOK_TABLEPROPLIST scriptProperty+) + ; + +scriptProperty +@init { gParent.pushMsg("script property", state); } +@after { gParent.popMsg(state); } + : + keyValueProperty | keyProperty ; hintClause diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6cdaedb..7edb3f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2557,20 +2557,13 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) ArrayList outputCols = new ArrayList(); int inputSerDeNum = 1, inputRecordWriterNum = 2; int outputSerDeNum = 4, outputRecordReaderNum = 5; - int outputColsNum = 6; - boolean outputColNames = false, outputColSchemas = false; int execPos = 3; boolean defaultOutputCols = false; - // Go over all the children - if (trfm.getChildCount() > outputColsNum) { - ASTNode outCols = (ASTNode) trfm.getChild(outputColsNum); - if (outCols.getType() == HiveParser.TOK_ALIASLIST) { - outputColNames = true; - } else if (outCols.getType() == HiveParser.TOK_TABCOLLIST) { - outputColSchemas = true; - } - } + ASTNode aliasList = (ASTNode) trfm.getFirstChildWithType(HiveParser.TOK_ALIASLIST); + ASTNode tabColList = (ASTNode) trfm.getFirstChildWithType(HiveParser.TOK_TABCOLLIST); + boolean outputColNames = aliasList != null; + boolean outputColSchemas = tabColList != null; // If column type is not specified, use a string if (!outputColNames && !outputColSchemas) { @@ -2586,14 +2579,11 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) outputCols.add(colInfo); defaultOutputCols = true; } else { - ASTNode collist = (ASTNode) trfm.getChild(outputColsNum); - int ccount = collist.getChildCount(); Set colAliasNamesDuplicateCheck = new HashSet(); - if (outputColNames) { - for (int i = 0; i < ccount; ++i) { - String colAlias = unescapeIdentifier(((ASTNode) collist.getChild(i)) - .getText()); + if (aliasList != null) { + for (int i = 0; i < aliasList.getChildCount(); ++i) { + String colAlias = unescapeIdentifier(aliasList.getChild(i).getText()); failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); String intName = getColumnInternalName(i); ColumnInfo colInfo = new ColumnInfo(intName, @@ -2602,11 +2592,10 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) outputCols.add(colInfo); } } else { - for (int i = 0; i < ccount; ++i) { - ASTNode child = (ASTNode) collist.getChild(i); + for (int i = 0; i < tabColList.getChildCount(); ++i) { + ASTNode child = (ASTNode) tabColList.getChild(i); assert child.getType() == HiveParser.TOK_TABCOL; - String colAlias = unescapeIdentifier(((ASTNode) child.getChild(0)) - .getText()); + String colAlias = unescapeIdentifier(child.getChild(0).getText()); failIfColAliasExists(colAliasNamesDuplicateCheck, colAlias); String intName = getColumnInternalName(i); ColumnInfo colInfo = new ColumnInfo(intName, TypeInfoUtils @@ -2702,10 +2691,17 @@ private Operator genScriptPlan(ASTNode trfm, QB qb, Operator input) .getChild(inputRecordWriterNum)); Class errRecordReader = getDefaultRecordReader(); + ScriptDesc scriptDesc = new ScriptDesc( + fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), + inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo); + + ASTNode propsAST = (ASTNode) trfm.getFirstChildWithType(HiveParser.TOK_TABLEPROPERTIES); + if (propsAST != null) { + scriptDesc.setScriptProps(DDLSemanticAnalyzer.getProps((ASTNode) propsAST.getChild(0))); + } + Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new ScriptDesc( - fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), - inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), + scriptDesc, new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); output.setColumnExprMap(new HashMap()); // disable backtracking diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java index 4f7c0da..f06a57a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ScriptDesc.java @@ -21,6 +21,8 @@ import org.apache.hadoop.hive.ql.exec.RecordReader; import org.apache.hadoop.hive.ql.exec.RecordWriter; +import java.util.HashMap; + /** * ScriptDesc. * @@ -39,6 +41,7 @@ private TableDesc scriptErrInfo; private Class errRecordReaderClass; + private HashMap scriptProps; public ScriptDesc() { } @@ -141,4 +144,12 @@ public void setInRecordWriterClass( this.inRecordWriterClass = inRecordWriterClass; } + public void setScriptProps(HashMap scriptProps) { + this.scriptProps = scriptProps; + } + + @Explain(displayName = "environments") + public HashMap getScriptProps() { + return scriptProps; + } } diff --git ql/src/test/queries/clientpositive/script_env_var3.q ql/src/test/queries/clientpositive/script_env_var3.q new file mode 100644 index 0000000..4642dec --- /dev/null +++ ql/src/test/queries/clientpositive/script_env_var3.q @@ -0,0 +1,11 @@ +set X=someX; +set Y=someY; + +explain +SELECT TRANSFORM('echo ${X:-x} ${Y:-y} ${Z:-z}') USING 'sh' AS x,y,z WITH ('Z'='someZ', 'Y') +ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' +FROM src tablesample (1 rows); + +SELECT TRANSFORM('echo ${X:-x} ${Y:-y} ${Z:-z}') USING 'sh' AS x,y,z WITH ('Z'='someZ', 'Y') +ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' +FROM src tablesample (1 rows); diff --git ql/src/test/results/clientpositive/script_env_var3.q.out ql/src/test/results/clientpositive/script_env_var3.q.out new file mode 100644 index 0000000..1783124 --- /dev/null +++ ql/src/test/results/clientpositive/script_env_var3.q.out @@ -0,0 +1,63 @@ +PREHOOK: query: explain +SELECT TRANSFORM('echo ${X:-x} ${Y:-y} ${Z:-z}') USING 'sh' AS x,y,z WITH ('Z'='someZ', 'Y') +ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' +FROM src tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT TRANSFORM('echo ${X:-x} ${Y:-y} ${Z:-z}') USING 'sh' AS x,y,z WITH ('Z'='someZ', 'Y') +ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' +FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: 'echo ${X:-x} ${Y:-y} ${Z:-z}' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + Transform Operator + command: sh + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + environments: + Y + Z someZ + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 5812 Basic stats: PARTIAL Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT TRANSFORM('echo ${X:-x} ${Y:-y} ${Z:-z}') USING 'sh' AS x,y,z WITH ('Z'='someZ', 'Y') +ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' +FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT TRANSFORM('echo ${X:-x} ${Y:-y} ${Z:-z}') USING 'sh' AS x,y,z WITH ('Z'='someZ', 'Y') +ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' +FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +x someY someZ