Index: ql/src/test/results/clientpositive/insert_compressed.q.out =================================================================== --- ql/src/test/results/clientpositive/insert_compressed.q.out (revision 0) +++ ql/src/test/results/clientpositive/insert_compressed.q.out (revision 0) @@ -0,0 +1,98 @@ +PREHOOK: query: drop table insert_compressed +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table insert_compressed +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table insert_compressed (key int, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table insert_compressed (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@insert_compressed +PREHOOK: query: insert overwrite table insert_compressed select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@insert_compressed +POSTHOOK: query: insert overwrite table insert_compressed select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@insert_compressed +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from insert_compressed +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_compressed +PREHOOK: Output: file:/tmp/franklin/hive_2011-07-20_17-04-35_230_3783253000190717636/-mr-10000 +POSTHOOK: query: select count(*) from insert_compressed +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_compressed +POSTHOOK: Output: file:/tmp/franklin/hive_2011-07-20_17-04-35_230_3783253000190717636/-mr-10000 +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +500 +PREHOOK: query: insert into table insert_compressed select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@insert_compressed +POSTHOOK: query: insert into table insert_compressed select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@insert_compressed +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from insert_compressed +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_compressed +PREHOOK: Output: file:/tmp/franklin/hive_2011-07-20_17-04-42_862_5790583242339882207/-mr-10000 +POSTHOOK: query: select count(*) from insert_compressed +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_compressed +POSTHOOK: Output: file:/tmp/franklin/hive_2011-07-20_17-04-42_862_5790583242339882207/-mr-10000 +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1000 +PREHOOK: query: insert into table insert_compressed select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@insert_compressed +POSTHOOK: query: insert into table insert_compressed select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@insert_compressed +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select count(*) from insert_compressed +PREHOOK: type: QUERY +PREHOOK: Input: default@insert_compressed +PREHOOK: Output: file:/tmp/franklin/hive_2011-07-20_17-04-50_417_6428429184794119745/-mr-10000 +POSTHOOK: query: select count(*) from insert_compressed +POSTHOOK: type: QUERY +POSTHOOK: Input: default@insert_compressed +POSTHOOK: Output: file:/tmp/franklin/hive_2011-07-20_17-04-50_417_6428429184794119745/-mr-10000 +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1500 +PREHOOK: query: drop table insert_compressed +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@insert_compressed +PREHOOK: Output: default@insert_compressed +POSTHOOK: query: drop table insert_compressed +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@insert_compressed +POSTHOOK: Output: default@insert_compressed +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: insert_compressed.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/queries/clientpositive/insert_compressed.q =================================================================== --- ql/src/test/queries/clientpositive/insert_compressed.q (revision 0) +++ ql/src/test/queries/clientpositive/insert_compressed.q (revision 0) @@ -0,0 +1,15 @@ +set hive.exec.compress.output=true; + +drop table insert_compressed; +create table insert_compressed (key int, value string); + +insert overwrite table insert_compressed select * from src; +select count(*) from insert_compressed; + +insert into table insert_compressed select * from src; +select count(*) from insert_compressed; + +insert into table insert_compressed select * from src; +select count(*) from insert_compressed; + +drop table insert_compressed; Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 1148973) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -1816,10 +1816,25 @@ // Note: there are race conditions here, but I don't believe // they're worse than what was already present. int counter = 1; + + // Strip off the file type, if any so we don't make: + // 000000_0.gz -> 000000_0.gz_copy_1 + String name = itemStaging.getName(); + String filetype; + int index = name.lastIndexOf('.'); + if (index >= 0) { + filetype = name.substring(index); + name = name.substring(0, index); + } else { + filetype = ""; + } + Path itemDest = new Path(destf, itemStaging.getName()); - + Path itemStagingBase = new Path(itemStaging.getParent(), name); + while (fs.exists(itemDest)) { - Path proposedStaging = itemStaging.suffix("_copy_" + counter++); + Path proposedStaging = itemStagingBase + .suffix("_copy_" + counter++).suffix(filetype); Path proposedDest = new Path(destf, proposedStaging.getName()); if (fs.exists(proposedDest)) {