diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AbstractStorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AbstractStorageFormatDescriptor.java index 854a7a3..53f780b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/AbstractStorageFormatDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AbstractStorageFormatDescriptor.java @@ -19,10 +19,17 @@ package org.apache.hadoop.hive.ql.io; +import java.util.Map; + public abstract class AbstractStorageFormatDescriptor implements StorageFormatDescriptor { @Override public String getSerde() { return null; } + + @Override + public Map getDefaultSerdeProps() { + return null; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/ParquetFileStorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/ParquetFileStorageFormatDescriptor.java index 166775f..81b4ce6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/ParquetFileStorageFormatDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/ParquetFileStorageFormatDescriptor.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.io; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; @@ -25,6 +27,9 @@ import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import com.google.common.collect.ImmutableSet; +import org.apache.parquet.hadoop.ParquetOutputFormat; +import org.apache.parquet.hadoop.metadata.CompressionCodecName; + public class ParquetFileStorageFormatDescriptor extends AbstractStorageFormatDescriptor { @Override @@ -43,4 +48,11 @@ public String getOutputFormat() { public String getSerde() { return ParquetHiveSerDe.class.getName(); } + + @Override + public Map getDefaultSerdeProps() { + Map results = new HashMap(); + results.put(ParquetOutputFormat.COMPRESSION, CompressionCodecName.SNAPPY.name()); + return results; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/StorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/StorageFormatDescriptor.java index 604abf8..2c8468d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/StorageFormatDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/StorageFormatDescriptor.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.io; +import java.util.Map; import java.util.Set; import javax.annotation.Nullable; @@ -44,5 +45,8 @@ * Return the name of the serde as a string or null */ @Nullable String getSerde(); - + /** + * Get the default Serde properties to set on all tables created by this SerDe or null. + */ + @Nullable Map getDefaultSerdeProps(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java index 48aca4d..bb6839f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java @@ -24,9 +24,11 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor; import org.apache.hadoop.hive.ql.io.StorageFormatFactory; +import org.apache.hive.common.util.ReflectionUtil; public class StorageFormat { private static final StorageFormatFactory storageFormatFactory = new StorageFormatFactory(); @@ -102,6 +104,11 @@ protected void processStorageFormat(String name) throws SemanticException { serde = ensureClassExists(HiveConf.getVar(conf, HiveConf.ConfVars.HIVEDEFAULTSERDE)); } } + + Map defaultSerdeProps = descriptor.getDefaultSerdeProps(); + if (defaultSerdeProps != null) { + serdeProps.putAll(defaultSerdeProps); + } } protected void fillDefaultStorageFormat(boolean isExternal) throws SemanticException { diff --git a/ql/src/test/results/clientpositive/parquet_array_null_element.q.out b/ql/src/test/results/clientpositive/parquet_array_null_element.q.out index 387f01e..4cdacc6 100644 --- a/ql/src/test/results/clientpositive/parquet_array_null_element.q.out +++ b/ql/src/test/results/clientpositive/parquet_array_null_element.q.out @@ -81,6 +81,7 @@ Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: + parquet.compression SNAPPY serialization.format 1 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_array_null_element.txt' OVERWRITE INTO TABLE parquet_array_null_element_staging PREHOOK: type: LOAD diff --git a/ql/src/test/results/clientpositive/parquet_create.q.out b/ql/src/test/results/clientpositive/parquet_create.q.out index c6d33ff..8b8cbcb 100644 --- a/ql/src/test/results/clientpositive/parquet_create.q.out +++ b/ql/src/test/results/clientpositive/parquet_create.q.out @@ -84,6 +84,7 @@ Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: + parquet.compression SNAPPY serialization.format 1 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_create.txt' OVERWRITE INTO TABLE parquet_create_staging PREHOOK: type: LOAD diff --git a/ql/src/test/results/clientpositive/parquet_partitioned.q.out b/ql/src/test/results/clientpositive/parquet_partitioned.q.out index 3529d70..be8eabc 100644 --- a/ql/src/test/results/clientpositive/parquet_partitioned.q.out +++ b/ql/src/test/results/clientpositive/parquet_partitioned.q.out @@ -78,6 +78,7 @@ Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: + parquet.compression SNAPPY serialization.format 1 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partitioned.txt' OVERWRITE INTO TABLE parquet_partitioned_staging PREHOOK: type: LOAD