diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 508e651..2f3620f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -544,13 +544,8 @@ public class DDLTask extends Task implements Serializable { if (crtTbl.getLocation() != null) tblStorDesc.setLocation(crtTbl.getLocation()); - if (crtTbl.isSequenceFile()) { - tbl.setInputFormatClass(SequenceFileInputFormat.class); - tbl.setOutputFormatClass(SequenceFileOutputFormat.class); - } else { - tbl.setOutputFormatClass(IgnoreKeyTextOutputFormat.class); - tbl.setInputFormatClass(TextInputFormat.class); - } + tbl.setInputFormatClass(crtTbl.getInputFormat()); + tbl.setOutputFormatClass(crtTbl.getOutputFormat()); if (crtTbl.isExternal()) tbl.setProperty("EXTERNAL", "TRUE"); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index f23def7..2850a90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -64,6 +64,10 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { TokenToTypeName.put(HiveParser.TOK_DATETIME, Constants.DATETIME_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_TIMESTAMP, Constants.TIMESTAMP_TYPE_NAME); } + private static final String TEXTFILE_INPUT = "org.apache.hadoop.mapred.TextInputFormat"; + private static final String TEXTFILE_OUTPUT = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"; + private static final String SEQUENCEFILE_INPUT = "org.apache.hadoop.mapred.SequenceFileInputFormat"; + private static final String SEQUENCEFILE_OUTPUT = "org.apache.hadoop.mapred.SequenceFileOutputFormat"; public static String getTypeName(int token) { return TokenToTypeName.get(token); @@ -129,12 +133,17 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { String mapKeyDelim = null; String lineDelim = null; String comment = null; - boolean isSequenceFile = - "SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT)); + String inputFormat = TEXTFILE_INPUT; + String outputFormat = TEXTFILE_OUTPUT; String location = null; String serde = null; Map mapProp = null; + if ("SequenceFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + inputFormat = SEQUENCEFILE_INPUT; + outputFormat = SEQUENCEFILE_OUTPUT; + } + LOG.info("Creating table" + tableName); int numCh = ast.getChildCount(); for (int num = 1; num < numCh; num++) @@ -195,10 +204,16 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { } break; case HiveParser.TOK_TBLSEQUENCEFILE: - isSequenceFile = true; + inputFormat = SEQUENCEFILE_INPUT; + outputFormat = SEQUENCEFILE_OUTPUT; break; case HiveParser.TOK_TBLTEXTFILE: - isSequenceFile = false; + inputFormat = TEXTFILE_INPUT; + outputFormat = TEXTFILE_OUTPUT; + break; + case HiveParser.TOK_TABLEFILEFORMAT: + inputFormat = unescapeSQLString(child.getChild(0).getText()); + outputFormat = unescapeSQLString(child.getChild(1).getText()); break; case HiveParser.TOK_TABLELOCATION: location = unescapeSQLString(child.getChild(0).getText()); @@ -211,7 +226,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { new createTableDesc(tableName, isExt, cols, partCols, bucketCols, sortCols, numBuckets, fieldDelim, collItemDelim, mapKeyDelim, lineDelim, - comment, isSequenceFile, location, serde, mapProp); + comment, inputFormat, outputFormat, location, serde, mapProp); validateCreateTable(crtTblDesc); rootTasks.add(TaskFactory.get(new DDLWork(crtTblDesc), conf)); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g index efa2ae1..256bf45 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g @@ -104,6 +104,7 @@ TOK_TABLEROWFORMATMAPKEYS; TOK_TABLEROWFORMATLINES; TOK_TBLSEQUENCEFILE; TOK_TBLTEXTFILE; +TOK_TABLEFILEFORMAT; TOK_TABCOLNAME; TOK_TABLELOCATION; TOK_TABLESAMPLE; @@ -305,6 +306,8 @@ tableFileFormat : KW_STORED KW_AS KW_SEQUENCEFILE -> TOK_TBLSEQUENCEFILE | KW_STORED KW_AS KW_TEXTFILE -> TOK_TBLTEXTFILE + | KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral + -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt) ; tableLocation @@ -900,6 +903,8 @@ KW_LINES: 'LINES'; KW_STORED: 'STORED'; KW_SEQUENCEFILE: 'SEQUENCEFILE'; KW_TEXTFILE: 'TEXTFILE'; +KW_INPUTFORMAT: 'INPUTFORMAT'; +KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; KW_LOCATION: 'LOCATION'; KW_TABLESAMPLE: 'TABLESAMPLE'; KW_BUCKET: 'BUCKET'; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java index 3686973..f5eaf75 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/createTableDesc.java @@ -42,7 +42,8 @@ public class createTableDesc extends ddlDesc implements Serializable String mapKeyDelim; String lineDelim; String comment; - boolean isSequenceFile; + String inputFormat; + String outputFormat; String location; String serName; Map mapProp; @@ -53,7 +54,7 @@ public class createTableDesc extends ddlDesc implements Serializable int numBuckets, String fieldDelim, String collItemDelim, String mapKeyDelim, String lineDelim, - String comment, boolean isSequenceFile, + String comment, String inputFormat, String outputFormat, String location, String serName, Map mapProp) { this.tableName = tableName; this.isExternal = isExternal; @@ -63,7 +64,8 @@ public class createTableDesc extends ddlDesc implements Serializable this.cols = cols; this.comment = comment; this.fieldDelim = fieldDelim; - this.isSequenceFile = isSequenceFile; + this.inputFormat = inputFormat; + this.outputFormat = outputFormat; this.lineDelim = lineDelim; this.location = location; this.mapKeyDelim = mapKeyDelim; @@ -171,13 +173,22 @@ public class createTableDesc extends ddlDesc implements Serializable this.comment = comment; } - @explain(displayName="isSequenceFile") - public boolean isSequenceFile() { - return isSequenceFile; + @explain(displayName="input format") + public String getInputFormat() { + return inputFormat; } - public void setSequenceFile(boolean isSequenceFile) { - this.isSequenceFile = isSequenceFile; + public void setInputFormat(String inputFormat) { + this.inputFormat = inputFormat; + } + + @explain(displayName="output format") + public String getOutputFormat() { + return outputFormat; + } + + public void setOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; } @explain(displayName="location") diff --git ql/src/test/queries/clientnegative/fileformat_bad_class.q ql/src/test/queries/clientnegative/fileformat_bad_class.q new file mode 100644 index 0000000..33dd4fa --- /dev/null +++ ql/src/test/queries/clientnegative/fileformat_bad_class.q @@ -0,0 +1,3 @@ +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'ClassDoesNotExist' + OUTPUTFORMAT 'java.lang.Void'; diff --git ql/src/test/queries/clientnegative/fileformat_void.q ql/src/test/queries/clientnegative/fileformat_void.q new file mode 100644 index 0000000..bb369e1 --- /dev/null +++ ql/src/test/queries/clientnegative/fileformat_void.q @@ -0,0 +1,8 @@ +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'java.lang.Void' + OUTPUTFORMAT 'java.lang.Void'; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10; + +DROP TABLE dest1; diff --git ql/src/test/queries/clientpositive/fileformat_sequencefile.q ql/src/test/queries/clientpositive/fileformat_sequencefile.q new file mode 100644 index 0000000..6ef0a7a --- /dev/null +++ ql/src/test/queries/clientpositive/fileformat_sequencefile.q @@ -0,0 +1,17 @@ +EXPLAIN +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileOutputFormat'; + +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileOutputFormat'; + +DESCRIBE EXTENDED dest1; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10; + +SELECT dest1.* FROM dest1; + +DROP TABLE dest1; diff --git ql/src/test/queries/clientpositive/fileformat_text.q ql/src/test/queries/clientpositive/fileformat_text.q new file mode 100644 index 0000000..f538861 --- /dev/null +++ ql/src/test/queries/clientpositive/fileformat_text.q @@ -0,0 +1,17 @@ +EXPLAIN +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'; + +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'; + +DESCRIBE EXTENDED dest1; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10; + +SELECT dest1.* FROM dest1; + +DROP TABLE dest1; diff --git ql/src/test/queries/clientpositive/fileformat_void.q ql/src/test/queries/clientpositive/fileformat_void.q new file mode 100644 index 0000000..6e1f6c1 --- /dev/null +++ ql/src/test/queries/clientpositive/fileformat_void.q @@ -0,0 +1,12 @@ +EXPLAIN +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'java.lang.Void' + OUTPUTFORMAT 'java.lang.Void'; + +CREATE TABLE dest1(key INT, value STRING) STORED AS + INPUTFORMAT 'java.lang.Void' + OUTPUTFORMAT 'java.lang.Void'; + +DESCRIBE EXTENDED dest1; + +DROP TABLE dest1; diff --git ql/src/test/results/clientnegative/fileformat_bad_class.q.out ql/src/test/results/clientnegative/fileformat_bad_class.q.out new file mode 100644 index 0000000..c4f09c5 --- /dev/null +++ ql/src/test/results/clientnegative/fileformat_bad_class.q.out @@ -0,0 +1,2 @@ +FAILED: Error in metadata: Class not found: ClassDoesNotExist +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask diff --git ql/src/test/results/clientnegative/fileformat_void.q.out ql/src/test/results/clientnegative/fileformat_void.q.out new file mode 100644 index 0000000..7736e7e --- /dev/null +++ ql/src/test/results/clientnegative/fileformat_void.q.out @@ -0,0 +1 @@ +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.MapRedTask diff --git ql/src/test/results/clientpositive/fileformat_sequencefile.q.out ql/src/test/results/clientpositive/fileformat_sequencefile.q.out new file mode 100644 index 0000000..d7bc408 --- /dev/null +++ ql/src/test/results/clientpositive/fileformat_sequencefile.q.out @@ -0,0 +1,32 @@ +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE dest1 (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' 'org.apache.hadoop.mapred.SequenceFileOutputFormat')) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table Operator: + Create Table + columns: key int, value string + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + # buckets: -1 + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: dest1 + isExternal: false + + +key int +value string +Detailed Table Information: +Table(tableName:dest1,dbName:default,owner:dphillips,createTime:1229534167,lastAccessTime:0,retention:0,sd:StorageDescriptor(cols:[FieldSchema(name:key,type:int,comment:null), FieldSchema(name:value,type:string,comment:null)],location:file:/vm/tmp/hive/build/ql/test/data/warehouse/dest1,inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat,outputFormat:org.apache.hadoop.mapred.SequenceFileOutputFormat,compressed:false,numBuckets:-1,serdeInfo:SerDeInfo(name:null,serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,parameters:{serialization.format=1}),bucketCols:[],sortCols:[],parameters:{}),partitionKeys:[],parameters:{}) +0 val_0 +4 val_4 +8 val_8 +0 val_0 +0 val_0 +5 val_5 +5 val_5 +2 val_2 +5 val_5 +9 val_9 diff --git ql/src/test/results/clientpositive/fileformat_text.q.out ql/src/test/results/clientpositive/fileformat_text.q.out new file mode 100644 index 0000000..0446a6b --- /dev/null +++ ql/src/test/results/clientpositive/fileformat_text.q.out @@ -0,0 +1,32 @@ +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE dest1 (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'org.apache.hadoop.mapred.TextInputFormat' 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat')) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table Operator: + Create Table + columns: key int, value string + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: dest1 + isExternal: false + + +key int +value string +Detailed Table Information: +Table(tableName:dest1,dbName:default,owner:dphillips,createTime:1229534038,lastAccessTime:0,retention:0,sd:StorageDescriptor(cols:[FieldSchema(name:key,type:int,comment:null), FieldSchema(name:value,type:string,comment:null)],location:file:/vm/tmp/hive/build/ql/test/data/warehouse/dest1,inputFormat:org.apache.hadoop.mapred.TextInputFormat,outputFormat:org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat,compressed:false,numBuckets:-1,serdeInfo:SerDeInfo(name:null,serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,parameters:{serialization.format=1}),bucketCols:[],sortCols:[],parameters:{}),partitionKeys:[],parameters:{}) +0 val_0 +4 val_4 +8 val_8 +0 val_0 +0 val_0 +5 val_5 +5 val_5 +2 val_2 +5 val_5 +9 val_9 diff --git ql/src/test/results/clientpositive/fileformat_void.q.out ql/src/test/results/clientpositive/fileformat_void.q.out new file mode 100644 index 0000000..625555a --- /dev/null +++ ql/src/test/results/clientpositive/fileformat_void.q.out @@ -0,0 +1,22 @@ +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE dest1 (TOK_TABCOLLIST (TOK_TABCOL key TOK_INT) (TOK_TABCOL value TOK_STRING)) (TOK_TABLEFILEFORMAT 'java.lang.Void' 'java.lang.Void')) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table Operator: + Create Table + columns: key int, value string + input format: java.lang.Void + # buckets: -1 + output format: java.lang.Void + name: dest1 + isExternal: false + + +key int +value string +Detailed Table Information: +Table(tableName:dest1,dbName:default,owner:dphillips,createTime:1229530913,lastAccessTime:0,retention:0,sd:StorageDescriptor(cols:[FieldSchema(name:key,type:int,comment:null), FieldSchema(name:value,type:string,comment:null)],location:file:/vm/tmp/hive/build/ql/test/data/warehouse/dest1,inputFormat:java.lang.Void,outputFormat:java.lang.Void,compressed:false,numBuckets:-1,serdeInfo:SerDeInfo(name:null,serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe,parameters:{serialization.format=1}),bucketCols:[],sortCols:[],parameters:{}),partitionKeys:[],parameters:{}) diff --git ql/src/test/results/clientpositive/input15.q.out ql/src/test/results/clientpositive/input15.q.out index 3fab5a5..07fbf14 100644 --- ql/src/test/results/clientpositive/input15.q.out +++ ql/src/test/results/clientpositive/input15.q.out @@ -10,10 +10,11 @@ STAGE PLANS: Create Table columns: key int, value string field delimiter: + input format: org.apache.hadoop.mapred.TextInputFormat # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: TEST15 isExternal: false - isSequenceFile: false key int diff --git ql/src/test/results/clientpositive/inputddl1.q.out ql/src/test/results/clientpositive/inputddl1.q.out index 59839ac..bb19df2 100644 --- ql/src/test/results/clientpositive/inputddl1.q.out +++ ql/src/test/results/clientpositive/inputddl1.q.out @@ -9,9 +9,10 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string + input format: org.apache.hadoop.mapred.TextInputFormat # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: INPUTDDL1 isExternal: false - isSequenceFile: false diff --git ql/src/test/results/clientpositive/inputddl2.q.out ql/src/test/results/clientpositive/inputddl2.q.out index bec281f..e0c5991 100644 --- ql/src/test/results/clientpositive/inputddl2.q.out +++ ql/src/test/results/clientpositive/inputddl2.q.out @@ -9,11 +9,12 @@ STAGE PLANS: Create Table Operator: Create Table columns: key int, value string + input format: org.apache.hadoop.mapred.TextInputFormat # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat partition columns: ds datetime, country string name: INPUTDDL2 isExternal: false - isSequenceFile: false key int diff --git ql/src/test/results/clientpositive/inputddl3.q.out ql/src/test/results/clientpositive/inputddl3.q.out index c562c3a..c6941bf 100644 --- ql/src/test/results/clientpositive/inputddl3.q.out +++ ql/src/test/results/clientpositive/inputddl3.q.out @@ -10,10 +10,11 @@ STAGE PLANS: Create Table columns: key int, value string field delimiter: + input format: org.apache.hadoop.mapred.TextInputFormat # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat name: INPUTDDL3 isExternal: false - isSequenceFile: false key int