Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (revision 1129953) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (working copy) @@ -224,11 +224,13 @@ private static class CombinePathInputFormat { private final List> opList; private final String inputFormatClassName; + private final String inputSerde; public CombinePathInputFormat(List> opList, - String inputFormatClassName) { + String inputFormatClassName, String serde) { this.opList = opList; this.inputFormatClassName = inputFormatClassName; + this.inputSerde = serde; } @Override @@ -239,7 +241,8 @@ return false; } return opList.equals(mObj.opList) && - inputFormatClassName.equals(mObj.inputFormatClassName); + inputFormatClassName.equals(mObj.inputFormatClassName) && + this.inputSerde.equals(mObj.inputSerde); } return false; } @@ -290,6 +293,7 @@ // Use HiveInputFormat if any of the paths is not splittable Class inputFormatClass = part.getInputFileFormatClass(); String inputFormatClassName = inputFormatClass.getName(); + String serdeClsName = part.getSerdeClassName(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not, @@ -352,7 +356,7 @@ if (!mrwork.isMapperCannotSpanPartns()) { opList = HiveFileFormatUtils.doGetWorksFromPath( pathToAliases, aliasToWork, filterPath); - f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName)); + f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName, serdeClsName)); } else { if (poolSet.contains(filterPath)) { @@ -370,7 +374,7 @@ "; using filter path " + filterPath); combine.createPool(job, f); if (!mrwork.isMapperCannotSpanPartns()) { - poolMap.put(new CombinePathInputFormat(opList, inputFormatClassName), f); + poolMap.put(new CombinePathInputFormat(opList, inputFormatClassName, serdeClsName), f); } } else { LOG.info("CombineHiveInputSplit: pool is already created for " + path + Index: ql/src/test/queries/clientpositive/combine_check_serde.q =================================================================== --- ql/src/test/queries/clientpositive/combine_check_serde.q (revision 0) +++ ql/src/test/queries/clientpositive/combine_check_serde.q (revision 0) @@ -0,0 +1,15 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.min.split.size=256; +set mapred.min.split.size.per.node=256; +set mapred.min.split.size.per.rack=256; +set mapred.max.split.size=256; + +create table combine_check_serde (key string, value string) partitioned by (ds string) stored as sequencefile; + +insert overwrite table combine_check_serde partition (ds='2010') select key, value from src; +desc extended combine_check_serde partition (ds='2010'); +alter table combine_check_serde set serde "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe"; +insert overwrite table combine_check_serde partition (ds='2011') select key, value from src; +desc extended combine_check_serde partition (ds='2011'); + +select key,value from combine_check_serde where ds>='2010' and key <100 order by key; Index: ql/src/test/results/clientpositive/combine_check_serde.q.out =================================================================== --- ql/src/test/results/clientpositive/combine_check_serde.q.out (revision 0) +++ ql/src/test/results/clientpositive/combine_check_serde.q.out (revision 0) @@ -0,0 +1,243 @@ +PREHOOK: query: create table combine_check_serde (key string, value string) partitioned by (ds string) stored as sequencefile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table combine_check_serde (key string, value string) partitioned by (ds string) stored as sequencefile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@combine_check_serde +PREHOOK: query: insert overwrite table combine_check_serde partition (ds='2010') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine_check_serde@ds=2010 +POSTHOOK: query: insert overwrite table combine_check_serde partition (ds='2010') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine_check_serde@ds=2010 +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc extended combine_check_serde partition (ds='2010') +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc extended combine_check_serde partition (ds='2010') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +key string +value string +ds string + +Detailed Partition Information Partition(values:[2010], dbName:default, tableName:combine_check_serde, createTime:1306894311, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null)], location:pfile:/Users/heyongqiang/Documents/workspace/Hive-3/build/ql/test/data/warehouse/combine_check_serde/ds=2010, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), parameters:{numFiles=1, transient_lastDdlTime=1306894311, numRows=500, totalSize=11999}) +PREHOOK: query: alter table combine_check_serde set serde "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe" +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@combine_check_serde +PREHOOK: Output: default@combine_check_serde +POSTHOOK: query: alter table combine_check_serde set serde "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe" +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@combine_check_serde +POSTHOOK: Output: default@combine_check_serde +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table combine_check_serde partition (ds='2011') select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine_check_serde@ds=2011 +POSTHOOK: query: insert overwrite table combine_check_serde partition (ds='2011') select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine_check_serde@ds=2011 +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2011).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc extended combine_check_serde partition (ds='2011') +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc extended combine_check_serde partition (ds='2011') +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2011).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +key string from deserializer +value string from deserializer +ds string + +Detailed Partition Information Partition(values:[2011], dbName:default, tableName:combine_check_serde, createTime:1306894318, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:from deserializer), FieldSchema(name:value, type:string, comment:from deserializer)], location:pfile:/Users/heyongqiang/Documents/workspace/Hive-3/build/ql/test/data/warehouse/combine_check_serde/ds=2011, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), parameters:{numFiles=1, transient_lastDdlTime=1306894318, numRows=500, totalSize=11999}) +PREHOOK: query: select key,value from combine_check_serde where ds>='2010' and key <100 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@combine_check_serde@ds=2010 +PREHOOK: Input: default@combine_check_serde@ds=2011 +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-05-31_19-11-58_813_7619906868057515454/-mr-10000 +POSTHOOK: query: select key,value from combine_check_serde where ds>='2010' and key <100 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine_check_serde@ds=2010 +POSTHOOK: Input: default@combine_check_serde@ds=2011 +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2011-05-31_19-11-58_813_7619906868057515454/-mr-10000 +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2010).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2011).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_check_serde PARTITION(ds=2011).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +2 val_2 +2 val_2 +20 val_20 +20 val_20 +24 val_24 +24 val_24 +24 val_24 +24 val_24 +26 val_26 +26 val_26 +26 val_26 +26 val_26 +27 val_27 +27 val_27 +28 val_28 +28 val_28 +30 val_30 +30 val_30 +33 val_33 +33 val_33 +34 val_34 +34 val_34 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +35 val_35 +37 val_37 +37 val_37 +37 val_37 +37 val_37 +4 val_4 +4 val_4 +41 val_41 +41 val_41 +42 val_42 +42 val_42 +42 val_42 +42 val_42 +43 val_43 +43 val_43 +44 val_44 +44 val_44 +47 val_47 +47 val_47 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +51 val_51 +51 val_51 +53 val_53 +53 val_53 +54 val_54 +54 val_54 +57 val_57 +57 val_57 +58 val_58 +58 val_58 +58 val_58 +58 val_58 +64 val_64 +64 val_64 +65 val_65 +65 val_65 +66 val_66 +66 val_66 +67 val_67 +67 val_67 +67 val_67 +67 val_67 +69 val_69 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +72 val_72 +72 val_72 +74 val_74 +74 val_74 +76 val_76 +76 val_76 +76 val_76 +76 val_76 +77 val_77 +77 val_77 +78 val_78 +78 val_78 +8 val_8 +8 val_8 +80 val_80 +80 val_80 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +9 val_9 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98