diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7e5a515..249a6d8 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -766,6 +766,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC"), "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"), + HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none", + new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"), + "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" + + "created with default file format. Leaving this null will result in using the default file format for all tables."), HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile"), "Default file format for storing result of the query."), HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"), diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index dba59c7..b9e15a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1093,7 +1093,7 @@ private void analyzeCreateIndex(ASTNode ast) throws SemanticException { } } - storageFormat.fillDefaultStorageFormat(); + storageFormat.fillDefaultStorageFormat(false); if (indexTableName == null) { indexTableName = MetaStoreUtils.getIndexTableName(qTabName[0], qTabName[1], indexName); indexTableName = qTabName[0] + "." + indexTableName; // on same database with base table diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index bc027d4..c4b5458 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10749,7 +10749,7 @@ ASTNode analyzeCreateTable( } } - storageFormat.fillDefaultStorageFormat(); + storageFormat.fillDefaultStorageFormat(isExt); if ((command_type == CTAS) && (storageFormat.getStorageHandler() != null)) { throw new SemanticException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java index 7723430..48aca4d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java @@ -104,9 +104,15 @@ protected void processStorageFormat(String name) throws SemanticException { } } - protected void fillDefaultStorageFormat() throws SemanticException { + protected void fillDefaultStorageFormat(boolean isExternal) throws SemanticException { if ((inputFormat == null) && (storageHandler == null)) { String defaultFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT); + String defaultManagedFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEDEFAULTMANAGEDFILEFORMAT); + + if (!isExternal && !"none".equals(defaultManagedFormat)) { + defaultFormat = defaultManagedFormat; + } + if (StringUtils.isBlank(defaultFormat)) { inputFormat = IOConstants.TEXTFILE_INPUT; outputFormat = IOConstants.TEXTFILE_OUTPUT; diff --git ql/src/test/queries/clientpositive/default_file_format.q ql/src/test/queries/clientpositive/default_file_format.q new file mode 100644 index 0000000..51481f4 --- /dev/null +++ ql/src/test/queries/clientpositive/default_file_format.q @@ -0,0 +1,28 @@ +create table t (c int); + +set hive.default.fileformat.managed=orc; + +create table o (c int); + +create external table e (c int) location '/some/path'; + +create table i (c int) location '/some/path'; + +set hive.default.fileformat=orc; + +create table io (c int); + +describe formatted t; +describe formatted o; +describe formatted io; +describe formatted e; +describe formatted i; + +drop table t; +drop table o; +drop table io; +drop table e; +drop table i; + +set hive.default.fileformat=TextFile; +set hive.default.fileformat.managed=none; diff --git ql/src/test/results/clientpositive/default_file_format.q.out ql/src/test/results/clientpositive/default_file_format.q.out new file mode 100644 index 0000000..c5d0087 --- /dev/null +++ ql/src/test/results/clientpositive/default_file_format.q.out @@ -0,0 +1,240 @@ +PREHOOK: query: create table t (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: create table o (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@o +POSTHOOK: query: create table o (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@o +PREHOOK: query: create external table e (c int) location '/some/path' +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@e +POSTHOOK: query: create external table e (c int) location '/some/path' +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@e +PREHOOK: query: create table i (c int) location '/some/path' +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i (c int) location '/some/path' +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: create table io (c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@io +POSTHOOK: query: create table io (c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@io +PREHOOK: query: describe formatted t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t +POSTHOOK: query: describe formatted t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted o +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@o +POSTHOOK: query: describe formatted o +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@o +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted io +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@io +POSTHOOK: query: describe formatted io +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@io +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted e +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@e +POSTHOOK: query: describe formatted e +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@e +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + EXTERNAL TRUE +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted i +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: describe formatted i +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type comment + +c int + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Protect Mode: None +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE false + numFiles 0 + numRows -1 + rawDataSize -1 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table t +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: drop table t +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: drop table o +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@o +PREHOOK: Output: default@o +POSTHOOK: query: drop table o +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@o +POSTHOOK: Output: default@o +PREHOOK: query: drop table io +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@io +PREHOOK: Output: default@io +POSTHOOK: query: drop table io +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@io +POSTHOOK: Output: default@io +PREHOOK: query: drop table e +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@e +PREHOOK: Output: default@e +POSTHOOK: query: drop table e +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@e +POSTHOOK: Output: default@e +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i