diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java index 9a21503a1a..bd15fa4a8a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -114,6 +115,7 @@ private FileChecker() { .put(SequenceFileInputFormat.class, SequenceFileInputFormatChecker.class) .put(RCFileInputFormat.class, RCFileInputFormat.class) .put(OrcInputFormat.class, OrcInputFormat.class) + .put(MapredParquetInputFormat.class, MapredParquetInputFormat.class) .build(); textInputFormatCheckerMap = ImmutableMap ., Class>builder() diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index 10e6a1821c..ed6d577f8d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -14,14 +14,19 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; +import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.io.DataCache; import org.apache.hadoop.hive.common.io.FileMetadataCache; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; +import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.LlapCacheOnlyInputFormatInterface; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetTableUtils; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -49,7 +54,7 @@ * are not currently supported. Removing the interface turns off vectorization. */ public class MapredParquetInputFormat extends FileInputFormat - implements VectorizedInputFormatInterface, LlapCacheOnlyInputFormatInterface { + implements InputFormatChecker, VectorizedInputFormatInterface, LlapCacheOnlyInputFormatInterface { private static final Logger LOG = LoggerFactory.getLogger(MapredParquetInputFormat.class); @@ -96,4 +101,18 @@ public void injectCaches( FileMetadataCache metadataCache, DataCache dataCache, Configuration cacheConf) { vectorizedSelf.injectCaches(metadataCache, dataCache, cacheConf); } + + @Override + public boolean validateInput(FileSystem fs, HiveConf conf, List files) + throws IOException { + if (files.size() <= 0) return false; + + // The simple validity check is to see if the file is of size 0 or not. + // Other checks maybe added in the future. + for (FileStatus file : files) { + if (file.getLen() == 0) return false; + } + + return true; + } } diff --git a/ql/src/test/queries/clientnegative/load_data_parquet_empty.q b/ql/src/test/queries/clientnegative/load_data_parquet_empty.q new file mode 100644 index 0000000000..a0b1a5ef7e --- /dev/null +++ b/ql/src/test/queries/clientnegative/load_data_parquet_empty.q @@ -0,0 +1,4 @@ +CREATE TABLE emptyparquet ( i int) PARTITIONED BY (s string) STORED AS PARQUET; + +dfs -touchz ${hiveconf:hive.metastore.warehouse.dir}/parquet_empty; +LOAD DATA INPATH '/Users/djaiswal/parquet/000000_0' INTO TABLE emptyparquet PARTITION (s='something'); diff --git a/ql/src/test/results/clientnegative/load_data_parquet_empty.q.out b/ql/src/test/results/clientnegative/load_data_parquet_empty.q.out new file mode 100644 index 0000000000..be0f0e689e --- /dev/null +++ b/ql/src/test/results/clientnegative/load_data_parquet_empty.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: CREATE TABLE emptyparquet ( i int) PARTITIONED BY (s string) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emptyparquet +POSTHOOK: query: CREATE TABLE emptyparquet ( i int) PARTITIONED BY (s string) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emptyparquet +FAILED: SemanticException Unable to load data to destination table. Error: The file that you are trying to load does not match the file format of the destination table.