diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index a6dd847..968a59e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -1857,7 +1857,7 @@ destination @init { msgs.push("destination specification"); } @after { msgs.pop(); } : - KW_LOCAL KW_DIRECTORY StringLiteral -> ^(TOK_LOCAL_DIR StringLiteral) + KW_LOCAL KW_DIRECTORY StringLiteral tableRowFormat? tableFileFormat? -> ^(TOK_LOCAL_DIR StringLiteral tableRowFormat? tableFileFormat?) | KW_DIRECTORY StringLiteral -> ^(TOK_DIR StringLiteral) | KW_TABLE tableOrPartition -> tableOrPartition ; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 7dd0426..bcbcabd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.plan.CreateTableDesc; +import org.apache.hadoop.hive.ql.plan.LocalDirectoryDesc; /** * Implementation of the query block. @@ -51,6 +52,7 @@ private boolean isQuery; private boolean isAnalyzeRewrite; private CreateTableDesc tblDesc = null; // table descriptor of the final + private LocalDirectoryDesc localDirectoryDesc = null ; // results @@ -215,6 +217,14 @@ public void setTableDesc(CreateTableDesc desc) { tblDesc = desc; } + public LocalDirectoryDesc getLLocalDirectoryDesc() { + return localDirectoryDesc; + } + + public void setLocalDirectoryDesc(LocalDirectoryDesc localDirectoryDesc) { + this.localDirectoryDesc = localDirectoryDesc; + } + /** * Whether this QB is for a CREATE-TABLE-AS-SELECT. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index db0eacc..5628fe2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -148,6 +148,7 @@ import org.apache.hadoop.hive.ql.plan.ListBucketingCtx; import org.apache.hadoop.hive.ql.plan.LoadFileDesc; import org.apache.hadoop.hive.ql.plan.LoadTableDesc; +import org.apache.hadoop.hive.ql.plan.LocalDirectoryDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; @@ -1112,6 +1113,10 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException } } + RowFormatParams rowFormatParams = new RowFormatParams(); + AnalyzeCreateCommonVars shared = new AnalyzeCreateCommonVars(); + StorageFormat storageFormat = new StorageFormat(); + LOG.info("Get metadata for destination tables"); // Go over all the destination structures and populate the related // metadata @@ -1199,6 +1204,45 @@ public void getMetaData(QB qb, ReadEntity parentInput) throws SemanticException } qb.getMetaData().setDestForAlias(name, fname, (ast.getToken().getType() == HiveParser.TOK_DIR)); + + LocalDirectoryDesc localDirectoryDesc = new LocalDirectoryDesc(); + boolean localDirectoryDescIsSet = false; + int numCh = ast.getChildCount(); + for (int num = 1; num < numCh ; num++){ + ASTNode child = (ASTNode) ast.getChild(num); + if (ast.getChild(num) != null){ + switch (child.getToken().getType()) { + case HiveParser.TOK_TABLEROWFORMAT: + rowFormatParams.analyzeRowFormat(shared, child); + localDirectoryDesc.setFieldDelim(rowFormatParams.fieldDelim); + localDirectoryDesc.setLineDelim(rowFormatParams.lineDelim); + localDirectoryDesc.setCollItemDelim(rowFormatParams.collItemDelim); + localDirectoryDesc.setMapKeyDelim(rowFormatParams.mapKeyDelim); + localDirectoryDesc.setFieldEscape(rowFormatParams.fieldEscape); + localDirectoryDescIsSet=true; + break; + case HiveParser.TOK_TABLESERIALIZER: + ASTNode serdeChild = (ASTNode) child.getChild(0); + shared.serde = unescapeSQLString(serdeChild.getChild(0).getText()); + localDirectoryDesc.setSerDe(shared.serde); + localDirectoryDescIsSet=true; + break; + case HiveParser.TOK_TBLSEQUENCEFILE: + case HiveParser.TOK_TBLTEXTFILE: + case HiveParser.TOK_TBLRCFILE: + case HiveParser.TOK_TBLORCFILE: + case HiveParser.TOK_TABLEFILEFORMAT: + storageFormat.fillStorageFormat(child, shared); + localDirectoryDesc.setOutputFormat(storageFormat.outputFormat); + localDirectoryDesc.setSerDe(shared.serde); + localDirectoryDescIsSet=true; + break; + } + } + } + if (localDirectoryDescIsSet){ + qb.setLocalDirectoryDesc(localDirectoryDesc); + } break; } default: @@ -5081,8 +5125,7 @@ private Operator genFileSinkPlan(String dest, QB qb, Operator input) String fileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); table_desc = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, fileFormat); } else { - table_desc = PlanUtils.getDefaultTableDesc(Integer - .toString(Utilities.ctrlaCode), cols, colTypes, false); + table_desc = PlanUtils.getDefaultTableDesc(qb.getLLocalDirectoryDesc(), cols, colTypes); } } else { table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LocalDirectoryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/LocalDirectoryDesc.java new file mode 100644 index 0000000..a80136e --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LocalDirectoryDesc.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.exec.Utilities; + +/** + * LocalDirectoryDesc. + * + */ +public class LocalDirectoryDesc implements Serializable { + private static final long serialVersionUID = 1L; + private String fieldDelim = Integer.toString(Utilities.ctrlaCode); + private String fieldEscape; + private String collItemDelim; + private String mapKeyDelim; + private String lineDelim; + + private String serde; + private String outputFormat; + + public LocalDirectoryDesc() { + } + + + public String getFieldEscape() { + return fieldEscape; + } + + public void setFieldEscape(String fieldEscape) { + this.fieldEscape = fieldEscape; + } + + public String getMapKeyDelim() { + return mapKeyDelim; + } + + public void setMapKeyDelim(String mapKeyDelim) { + this.mapKeyDelim = mapKeyDelim; + } + + public String getLineDelim() { + return lineDelim; + } + + public void setLineDelim(String lineDelim) { + this.lineDelim = lineDelim; + } + + public String getFieldDelim() { + return fieldDelim; + } + + public void setFieldDelim(String fieldDelim) { + this.fieldDelim = fieldDelim; + } + + public String getCollItemDelim() { + return collItemDelim; + } + + public void setCollItemDelim(String collItemDelim) { + this.collItemDelim = collItemDelim; + } + + public void setSerDe(String serde) { + this.serde = serde; + } + + public String getSerDe(){ + return serde; + } + + public String getOutputFormat() { + return outputFormat; + } + + public void setOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index de094e2..2a2470f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -99,6 +99,55 @@ public static MapredWork getMapRedWork() { } } + public static TableDesc getDefaultTableDesc(LocalDirectoryDesc localDirectoryDesc, String cols, String colTypes ) { + TableDesc tableDesc = getDefaultTableDesc(Integer.toString(Utilities.ctrlaCode), cols, + colTypes, false);; + if (localDirectoryDesc == null) { + return tableDesc; + } + + try { + if (localDirectoryDesc.getFieldDelim() != null) { + tableDesc.getProperties().setProperty( + serdeConstants.FIELD_DELIM, localDirectoryDesc.getFieldDelim()); + tableDesc.getProperties().setProperty( + serdeConstants.SERIALIZATION_FORMAT, localDirectoryDesc.getFieldDelim()); + } + if (localDirectoryDesc.getLineDelim() != null) { + tableDesc.getProperties().setProperty( + serdeConstants.LINE_DELIM, localDirectoryDesc.getLineDelim()); + } + if (localDirectoryDesc.getCollItemDelim() != null) { + tableDesc.getProperties().setProperty( + serdeConstants.COLLECTION_DELIM, localDirectoryDesc.getCollItemDelim()); + } + if (localDirectoryDesc.getMapKeyDelim() != null) { + tableDesc.getProperties().setProperty( + serdeConstants.MAPKEY_DELIM, localDirectoryDesc.getMapKeyDelim()); + } + if (localDirectoryDesc.getFieldEscape() !=null) { + tableDesc.getProperties().setProperty( + serdeConstants.ESCAPE_CHAR, localDirectoryDesc.getFieldEscape()); + } + if (localDirectoryDesc.getSerDe() != null) { + tableDesc.setSerdeClassName(localDirectoryDesc.getSerDe()); + tableDesc.getProperties().setProperty( + serdeConstants.SERIALIZATION_LIB, localDirectoryDesc.getSerDe()); + tableDesc.setDeserializerClass( + (Class) Class.forName(localDirectoryDesc.getSerDe())); + } + if (localDirectoryDesc.getOutputFormat() != null){ + tableDesc.setOutputFileFormatClass(Class.forName(localDirectoryDesc.getOutputFormat())); + } + } catch (ClassNotFoundException e) { + // mimicking behaviour in CreateTableDesc tableDesc creation + // returning null table description for output. + e.printStackTrace(); + return null; + } + return tableDesc; + } + /** * Generate the table descriptor of MetadataTypedColumnsetSerDe with the * separatorCode and column names (comma separated string).