diff --git ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java index d16b082..113c35e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java @@ -484,8 +484,14 @@ public class RCFile { @Override public void write(DataOutput out) throws IOException { - for (NonSyncDataOutputBuffer currentBuf : loadedColumnsValueBuffer) { - out.write(currentBuf.getData(), 0, currentBuf.getLength()); + if (codec != null) { + for (NonSyncDataOutputBuffer currentBuf : compressedColumnsValueBuffer) { + out.write(currentBuf.getData(), 0, currentBuf.getLength()); + } + } else { + for (NonSyncDataOutputBuffer currentBuf : loadedColumnsValueBuffer) { + out.write(currentBuf.getData(), 0, currentBuf.getLength()); + } } } diff --git ql/src/test/queries/clientpositive/create_merge_compressed.q ql/src/test/queries/clientpositive/create_merge_compressed.q new file mode 100644 index 0000000..121d4a2 --- /dev/null +++ ql/src/test/queries/clientpositive/create_merge_compressed.q @@ -0,0 +1,33 @@ +create table src_rc_merge_test(key int, value string) stored as rcfile; + +load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test; + +show table extended like `src_rc_merge_test`; + + +set hive.exec.compress.output = true; +set mapred.output.compression.codec=org.apache.hadoop.io.compress.BZip2Codec; + +create table tgt_rc_merge_test stored as rcfile as select * from src_rc_merge_test; + +show table extended like `tgt_rc_merge_test`; + +select count(1) from tgt_rc_merge_test; +select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test; + +insert into table tgt_rc_merge_test select * from src_rc_merge_test; + +show table extended like `tgt_rc_merge_test`; + +select count(1) from tgt_rc_merge_test; +select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test; + +alter table tgt_rc_merge_test concatenate; + +show table extended like `tgt_rc_merge_test`; + +select count(1) from tgt_rc_merge_test; +select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test; + +drop table src_rc_merge_test; +drop table tgt_rc_merge_test; \ No newline at end of file diff --git ql/src/test/results/clientpositive/create_merge_compressed.q.out ql/src/test/results/clientpositive/create_merge_compressed.q.out new file mode 100644 index 0000000..3edfcc9 --- /dev/null +++ ql/src/test/results/clientpositive/create_merge_compressed.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test +PREHOOK: type: LOAD +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test +POSTHOOK: type: LOAD +POSTHOOK: Output: default@src_rc_merge_test +PREHOOK: query: show table extended like `src_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `src_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:src_rc_merge_test +owner:krishnak +location:pfile:/Users/krishnak/Projects/hdp/sources/hive-git-apache/build/ql/test/data/warehouse/src_rc_merge_test +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:208 +maxFileSize:208 +minFileSize:208 +lastAccessTime:0 +lastUpdateTime:1314611766000 + +PREHOOK: query: create table tgt_rc_merge_test stored as rcfile as select * from src_rc_merge_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src_rc_merge_test +POSTHOOK: query: create table tgt_rc_merge_test stored as rcfile as select * from src_rc_merge_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +tableName:tgt_rc_merge_test +owner:krishnak +location:pfile:/Users/krishnak/Projects/hdp/sources/hive-git-apache/build/ql/test/data/warehouse/tgt_rc_merge_test +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:351 +maxFileSize:351 +minFileSize:351 +lastAccessTime:0 +lastUpdateTime:1314611772000 + +PREHOOK: query: select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-56-14_177_7432749281963663226/-mr-10000 +POSTHOOK: query: select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-56-14_177_7432749281963663226/-mr-10000 +5 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-56-33_975_508970466936029578/-mr-10000 +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-56-33_975_508970466936029578/-mr-10000 +23 -375947694 +PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +tableName:tgt_rc_merge_test +owner:krishnak +location:pfile:/Users/krishnak/Projects/hdp/sources/hive-git-apache/build/ql/test/data/warehouse/tgt_rc_merge_test +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:2 +totalFileSize:702 +maxFileSize:351 +minFileSize:351 +lastAccessTime:0 +lastUpdateTime:1314611822000 + +PREHOOK: query: select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-03_935_6606296649888605515/-mr-10000 +POSTHOOK: query: select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-03_935_6606296649888605515/-mr-10000 +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +10 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-15_012_5025723838177979680/-mr-10000 +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-15_012_5025723838177979680/-mr-10000 +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +46 -751895388 +PREHOOK: query: alter table tgt_rc_merge_test concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: alter table tgt_rc_merge_test concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: show table extended like `tgt_rc_merge_test` +PREHOOK: type: SHOW_TABLESTATUS +POSTHOOK: query: show table extended like `tgt_rc_merge_test` +POSTHOOK: type: SHOW_TABLESTATUS +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +tableName:tgt_rc_merge_test +owner:krishnak +location:pfile:/Users/krishnak/Projects/hdp/sources/hive-git-apache/build/ql/test/data/warehouse/tgt_rc_merge_test +inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat +outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat +columns:struct columns { i32 key, string value} +partitioned:false +partitionColumns: +totalNumberFiles:1 +totalFileSize:510 +maxFileSize:510 +minFileSize:510 +lastAccessTime:0 +lastUpdateTime:1314611850000 + +PREHOOK: query: select count(1) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-32_567_3207952970238755873/-mr-10000 +POSTHOOK: query: select count(1) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-32_567_3207952970238755873/-mr-10000 +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +10 +PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +PREHOOK: type: QUERY +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-41_492_2355559229782773078/-mr-10000 +POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-29_02-57-41_492_2355559229782773078/-mr-10000 +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +46 -751895388 +PREHOOK: query: drop table src_rc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_rc_merge_test +PREHOOK: Output: default@src_rc_merge_test +POSTHOOK: query: drop table src_rc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_rc_merge_test +POSTHOOK: Output: default@src_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: drop table tgt_rc_merge_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tgt_rc_merge_test +PREHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: query: drop table tgt_rc_merge_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tgt_rc_merge_test +POSTHOOK: Output: default@tgt_rc_merge_test +POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]