diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java index 129347b..ebf2298 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java @@ -408,7 +408,7 @@ private AverageSize getAverageSize(FileSystem inpFs, Path dirPath) { */ private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) { AverageSize averageSize = getAverageSize(inpFs, dirPath); - if (averageSize.getTotalSize() <= 0) { + if (averageSize.getTotalSize() < 0) { return -1; } diff --git ql/src/test/queries/clientpositive/merge_empty.q ql/src/test/queries/clientpositive/merge_empty.q new file mode 100644 index 0000000..188b39e --- /dev/null +++ ql/src/test/queries/clientpositive/merge_empty.q @@ -0,0 +1,14 @@ +set hive.merge.mapredfiles=true; +set hive.merge.sparkfiles=true; +set hive.auto.convert.join=false; +set mapreduce.job.reduces=1000; + +create table dummy (a string); +insert overwrite directory '/tmp/test' select src.key from src join dummy on src.key = dummy.a; +dfs -ls /tmp/test; + +-- verify that this doesn't merge for bucketed tables +create table foo (a bigint, b string) clustered by (a) into 256 buckets; +create table bar (a bigint, b string); +insert overwrite table foo select * from bar; +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/foo; diff --git ql/src/test/results/clientpositive/merge_empty.q.out ql/src/test/results/clientpositive/merge_empty.q.out new file mode 100644 index 0000000..c13cbf4 --- /dev/null +++ ql/src/test/results/clientpositive/merge_empty.q.out @@ -0,0 +1,45 @@ +PREHOOK: query: create table dummy (a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dummy +POSTHOOK: query: create table dummy (a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dummy +#### A masked pattern was here #### +PREHOOK: type: QUERY +PREHOOK: Input: default@dummy +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dummy +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: create table foo (a bigint, b string) clustered by (a) into 256 buckets +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (a bigint, b string) clustered by (a) into 256 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: create table bar (a bigint, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@bar +POSTHOOK: query: create table bar (a bigint, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@bar +PREHOOK: query: insert overwrite table foo select * from bar +PREHOOK: type: QUERY +PREHOOK: Input: default@bar +PREHOOK: Output: default@foo +POSTHOOK: query: insert overwrite table foo select * from bar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bar +POSTHOOK: Output: default@foo +POSTHOOK: Lineage: foo.a SIMPLE [(bar)bar.FieldSchema(name:a, type:bigint, comment:null), ] +POSTHOOK: Lineage: foo.b SIMPLE [(bar)bar.FieldSchema(name:b, type:string, comment:null), ] +Found 256 items +#### A masked pattern was here ####