diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 28d8f52..8e089af 100644
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -436,6 +436,10 @@
HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "TextFile"),
HIVECHECKFILEFORMAT("hive.fileformat.check", true),
+ // default serde for rcfile
+ HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde",
+ "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"),
+
//Location of Hive run time structured log file
HIVEHISTORYFILELOC("hive.querylog.location", "/tmp/" + System.getProperty("user.name")),
diff --git conf/hive-default.xml.template conf/hive-default.xml.template
index be9edc3..99861ba 100644
--- conf/hive-default.xml.template
+++ conf/hive-default.xml.template
@@ -374,6 +374,12 @@
+ hive.default.rcfile.serde
+ org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
+ The default SerDe hive will use for the rcfile format
+
+
+
hive.fileformat.check
true
Whether to check file format or not when loading data files
diff --git data/conf/hive-site.xml data/conf/hive-site.xml
index 544ba35..4e6ff16 100644
--- data/conf/hive-site.xml
+++ data/conf/hive-site.xml
@@ -188,4 +188,10 @@
The default input format, if it is not specified, the system assigns it. It is set to HiveInputFormat for hadoop versions 17, 18 and 19, whereas it is set to CombineHiveInputFormat for hadoop 20. The user can always overwrite it - if there is a bug in CombineHiveInputFormat, it can always be manually set to HiveInputFormat.
+
+ hive.default.rcfile.serde
+ org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+ The default SerDe hive will use for the rcfile format
+
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index e5321c8..ff96d39 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -64,7 +64,6 @@
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
@@ -121,7 +120,6 @@
.getName();
protected static final String ORCFILE_SERDE = OrcSerde.class
.getName();
- protected static final String COLUMNAR_SERDE = ColumnarSerDe.class.getName();
class RowFormatParams {
String fieldDelim = null;
@@ -195,7 +193,7 @@ protected boolean fillStorageFormat(ASTNode child, AnalyzeCreateCommonVars share
inputFormat = RCFILE_INPUT;
outputFormat = RCFILE_OUTPUT;
if (shared.serde == null) {
- shared.serde = COLUMNAR_SERDE;
+ shared.serde = conf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE);
}
storageFormat = true;
break;
@@ -231,7 +229,7 @@ protected void fillDefaultStorageFormat(AnalyzeCreateCommonVars shared) {
} else if ("RCFile".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) {
inputFormat = RCFILE_INPUT;
outputFormat = RCFILE_OUTPUT;
- shared.serde = COLUMNAR_SERDE;
+ shared.serde = conf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE);
} else if ("ORC".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) {
inputFormat = ORCFILE_INPUT;
outputFormat = ORCFILE_OUTPUT;
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
index 3fb9400..b2da758 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
@@ -1350,7 +1350,7 @@ private void analyzeAlterTableFileFormat(ASTNode ast, String tableName,
case HiveParser.TOK_TBLRCFILE:
inputFormat = RCFILE_INPUT;
outputFormat = RCFILE_OUTPUT;
- serde = COLUMNAR_SERDE;
+ serde = conf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE);
break;
case HiveParser.TOK_TBLORCFILE:
inputFormat = ORCFILE_INPUT;
diff --git ql/src/test/queries/clientpositive/rcfile_default_format.q ql/src/test/queries/clientpositive/rcfile_default_format.q
index 1106e4c..e61292e 100644
--- ql/src/test/queries/clientpositive/rcfile_default_format.q
+++ ql/src/test/queries/clientpositive/rcfile_default_format.q
@@ -1,19 +1,31 @@
SET hive.default.fileformat = RCFile;
CREATE TABLE rcfile_default_format (key STRING);
-DESCRIBE EXTENDED rcfile_default_format;
+DESCRIBE FORMATTED rcfile_default_format;
CREATE TABLE rcfile_default_format_ctas AS SELECT key,value FROM src;
-DESCRIBE EXTENDED rcfile_default_format_ctas;
+DESCRIBE FORMATTED rcfile_default_format_ctas;
CREATE TABLE rcfile_default_format_txtfile (key STRING) STORED AS TEXTFILE;
INSERT OVERWRITE TABLE rcfile_default_format_txtfile SELECT key from src;
-DESCRIBE EXTENDED rcfile_default_format_txtfile;
+DESCRIBE FORMATTED rcfile_default_format_txtfile;
SET hive.default.fileformat = TextFile;
CREATE TABLE textfile_default_format_ctas AS SELECT key,value FROM rcfile_default_format_ctas;
-DESCRIBE EXTENDED textfile_default_format_ctas;
+DESCRIBE FORMATTED textfile_default_format_ctas;
+SET hive.default.fileformat = RCFile;
+SET hive.default.rcfile.serde = org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe;
+CREATE TABLE rcfile_default_format_ctas_default_serde AS SELECT key,value FROM rcfile_default_format_ctas;
+DESCRIBE FORMATTED rcfile_default_format_ctas_default_serde;
+
+CREATE TABLE rcfile_default_format_default_serde (key STRING);
+DESCRIBE FORMATTED rcfile_default_format_default_serde;
+SET hive.default.fileformat = TextFile;
+CREATE TABLE rcfile_ctas_default_serde STORED AS rcfile AS SELECT key,value FROM rcfile_default_format_ctas;
+DESCRIBE FORMATTED rcfile_ctas_default_serde;
+CREATE TABLE rcfile_default_serde (key STRING) STORED AS rcfile;
+DESCRIBE FORMATTED rcfile_default_serde;
diff --git ql/src/test/results/clientpositive/rcfile_default_format.q.out ql/src/test/results/clientpositive/rcfile_default_format.q.out
index 8c73955..6ad7f0b 100644
--- ql/src/test/results/clientpositive/rcfile_default_format.q.out
+++ ql/src/test/results/clientpositive/rcfile_default_format.q.out
@@ -3,13 +3,34 @@ PREHOOK: type: CREATETABLE
POSTHOOK: query: CREATE TABLE rcfile_default_format (key STRING)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@rcfile_default_format
-PREHOOK: query: DESCRIBE EXTENDED rcfile_default_format
+PREHOOK: query: DESCRIBE FORMATTED rcfile_default_format
PREHOOK: type: DESCTABLE
-POSTHOOK: query: DESCRIBE EXTENDED rcfile_default_format
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_default_format
POSTHOOK: type: DESCTABLE
+# col_name data_type comment
+
key string None
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
PREHOOK: query: CREATE TABLE rcfile_default_format_ctas AS SELECT key,value FROM src
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@src
@@ -17,14 +38,40 @@ POSTHOOK: query: CREATE TABLE rcfile_default_format_ctas AS SELECT key,value FRO
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@src
POSTHOOK: Output: default@rcfile_default_format_ctas
-PREHOOK: query: DESCRIBE EXTENDED rcfile_default_format_ctas
+PREHOOK: query: DESCRIBE FORMATTED rcfile_default_format_ctas
PREHOOK: type: DESCTABLE
-POSTHOOK: query: DESCRIBE EXTENDED rcfile_default_format_ctas
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_default_format_ctas
POSTHOOK: type: DESCTABLE
+# col_name data_type comment
+
key string None
value string None
+# Detailed Table Information
+Database: default
#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ numPartitions 0
+ numRows 500
+ rawDataSize 4812
+ totalSize 5293
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
PREHOOK: query: CREATE TABLE rcfile_default_format_txtfile (key STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
POSTHOOK: query: CREATE TABLE rcfile_default_format_txtfile (key STRING) STORED AS TEXTFILE
@@ -39,14 +86,40 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@rcfile_default_format_txtfile
POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-PREHOOK: query: DESCRIBE EXTENDED rcfile_default_format_txtfile
+PREHOOK: query: DESCRIBE FORMATTED rcfile_default_format_txtfile
PREHOOK: type: DESCTABLE
-POSTHOOK: query: DESCRIBE EXTENDED rcfile_default_format_txtfile
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_default_format_txtfile
POSTHOOK: type: DESCTABLE
POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
key string None
+# Detailed Table Information
+Database: default
#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ numPartitions 0
+ numRows 500
+ rawDataSize 1406
+ totalSize 1906
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
PREHOOK: query: CREATE TABLE textfile_default_format_ctas AS SELECT key,value FROM rcfile_default_format_ctas
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@rcfile_default_format_ctas
@@ -55,12 +128,194 @@ POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@rcfile_default_format_ctas
POSTHOOK: Output: default@textfile_default_format_ctas
POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
-PREHOOK: query: DESCRIBE EXTENDED textfile_default_format_ctas
+PREHOOK: query: DESCRIBE FORMATTED textfile_default_format_ctas
PREHOOK: type: DESCTABLE
-POSTHOOK: query: DESCRIBE EXTENDED textfile_default_format_ctas
+POSTHOOK: query: DESCRIBE FORMATTED textfile_default_format_ctas
POSTHOOK: type: DESCTABLE
POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
key string None
value string None
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ numPartitions 0
+ numRows 500
+ rawDataSize 5312
+ totalSize 5812
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: CREATE TABLE rcfile_default_format_ctas_default_serde AS SELECT key,value FROM rcfile_default_format_ctas
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@rcfile_default_format_ctas
+POSTHOOK: query: CREATE TABLE rcfile_default_format_ctas_default_serde AS SELECT key,value FROM rcfile_default_format_ctas
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@rcfile_default_format_ctas
+POSTHOOK: Output: default@rcfile_default_format_ctas_default_serde
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED rcfile_default_format_ctas_default_serde
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_default_format_ctas_default_serde
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string from deserializer
+value string from deserializer
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ numPartitions 0
+ numRows 500
+ rawDataSize 4812
+ totalSize 5293
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: CREATE TABLE rcfile_default_format_default_serde (key STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE rcfile_default_format_default_serde (key STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@rcfile_default_format_default_serde
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED rcfile_default_format_default_serde
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_default_format_default_serde
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string from deserializer
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: CREATE TABLE rcfile_ctas_default_serde STORED AS rcfile AS SELECT key,value FROM rcfile_default_format_ctas
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@rcfile_default_format_ctas
+POSTHOOK: query: CREATE TABLE rcfile_ctas_default_serde STORED AS rcfile AS SELECT key,value FROM rcfile_default_format_ctas
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@rcfile_default_format_ctas
+POSTHOOK: Output: default@rcfile_ctas_default_serde
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED rcfile_ctas_default_serde
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_ctas_default_serde
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string from deserializer
+value string from deserializer
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ numFiles 1
+ numPartitions 0
+ numRows 500
+ rawDataSize 4812
+ totalSize 5293
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: CREATE TABLE rcfile_default_serde (key STRING) STORED AS rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE rcfile_default_serde (key STRING) STORED AS rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@rcfile_default_serde
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DESCRIBE FORMATTED rcfile_default_serde
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED rcfile_default_serde
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: rcfile_default_format_txtfile.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+# col_name data_type comment
+
+key string from deserializer
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Protect Mode: None
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe
+InputFormat: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1