Index: metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (revision 1426147) +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (working copy) @@ -1286,7 +1286,7 @@ return null; } return new Partition(mpart.getValues(), dbName, tblName, mpart.getCreateTime(), - mpart.getLastAccessTime(), convertToStorageDescriptor(mpart.getSd(), true), + mpart.getLastAccessTime(), convertToStorageDescriptor(mpart.getSd(), false), mpart.getParameters()); } @@ -1681,7 +1681,7 @@ query.setOrdering("partitionName ascending"); List mparts = (List) query.executeWithMap(params); - // pm.retrieveAll(mparts); // retrieveAll is pessimistic. some fields may not be needed + pm.retrieveAll(mparts); // retrieveAll is pessimistic. some fields may not be needed List results = convertToParts(dbName, tblName, mparts); // pm.makeTransientAll(mparts); // makeTransient will prohibit future access of unfetched fields query.closeAll(); Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (revision 1426147) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (working copy) @@ -237,7 +237,7 @@ String lib = part.getSd().getSerdeInfo().getSerializationLib(); try { Deserializer deserializer = SerDeUtils.lookupDeserializer(lib); - deserializer.initialize(conf, MetaStoreUtils.getSchema(part, table)); + deserializer.initialize(conf, MetaStoreUtils.getPartitionSchema(part, table)); return deserializer; } catch (RuntimeException e) { throw e; @@ -497,6 +497,13 @@ .getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); } + public static Properties getPartitionSchema( + org.apache.hadoop.hive.metastore.api.Partition partition, + org.apache.hadoop.hive.metastore.api.Table table) { + return MetaStoreUtils.getSchema(partition.getSd(), partition.getSd(), partition + .getParameters(), partition.getDbName(), partition.getTableName(), table.getPartitionKeys()); + } + public static Properties getSchema( org.apache.hadoop.hive.metastore.api.Partition part, org.apache.hadoop.hive.metastore.api.Table table) { Index: ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out (working copy) @@ -0,0 +1,75 @@ +PREHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 2 +0 val_0 2 +0 val_0 2 +10 val_10 1 +10 val_10 2 +100 val_100 1 +100 val_100 1 +100 val_100 2 +100 val_100 2 +103 val_103 1 +103 val_103 1 +103 val_103 2 +103 val_103 2 +104 val_104 1 +104 val_104 1 +104 val_104 2 +104 val_104 2 Index: ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out (working copy) @@ -0,0 +1,105 @@ +PREHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned set fileformat sequencefile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set fileformat sequencefile +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='3') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=3 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='3') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=3 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +PREHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +POSTHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 2 +0 val_0 2 +0 val_0 2 +0 val_0 3 +0 val_0 3 +0 val_0 3 +10 val_10 1 +10 val_10 2 +10 val_10 3 +100 val_100 1 +100 val_100 1 +100 val_100 2 +100 val_100 2 +100 val_100 3 +100 val_100 3 +103 val_103 1 +103 val_103 1 Index: ql/src/test/queries/clientpositive/partition_wise_fileformat8.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat8.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat8.q (working copy) @@ -0,0 +1,12 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src; +alter table partition_test_partitioned set fileformat sequencefile; +insert overwrite table partition_test_partitioned partition(dt='2') select * from src; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; +insert overwrite table partition_test_partitioned partition(dt='3') select * from src; + +select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20; Index: ql/src/test/queries/clientpositive/partition_wise_fileformat9.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat9.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat9.q (working copy) @@ -0,0 +1,10 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +insert overwrite table partition_test_partitioned partition(dt='2') select * from src; + +select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20; Index: ql/src/test/queries/clientpositive/partition_wise_fileformat10.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat10.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat10.q (working copy) @@ -0,0 +1,13 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that the schema can be changed for binary serde data +create table prt(key string, value string) partitioned by (dt string); +insert overwrite table prt partition(dt='1') select * from src where key = 238; + +select * from prt where dt is not null; +select key+key, value from prt where dt is not null; + +alter table prt add columns (value2 string); + +select key+key, value from prt where dt is not null; +select * from prt where dt is not null; Index: ql/src/test/queries/clientpositive/partition_wise_fileformat11.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat11.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat11.q (working copy) @@ -0,0 +1,15 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238; + +select * from partition_test_partitioned where dt is not null; +select key+key, value from partition_test_partitioned where dt is not null; + +alter table partition_test_partitioned change key key int; +alter table partition_test_partitioned add columns value2 string; + +select key+key, value from partition_test_partitioned where dt is not null; +select * from partition_test_partitioned where dt is not null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1426147) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -793,7 +793,8 @@ partDir.add(p); try { - partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part)); + // partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part)); + partDesc.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 1426147) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -214,7 +214,7 @@ getInputFormatClass(); // This will set up field: outputFormatClass getOutputFormatClass(); - + getDeserializer(); } public String getName() { @@ -276,6 +276,10 @@ return MetaStoreUtils.getSchema(tPartition, table.getTTable()); } + public Properties getSchemaFromPartitionSchema() { + return MetaStoreUtils.getPartitionSchema(tPartition, table.getTTable()); + } + public Properties getSchemaFromTableSchema(Properties tblSchema) { return MetaStoreUtils.getPartSchemaFromTableSchema(tPartition.getSd(), table.getTTable().getSd(), tPartition.getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys(), Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (revision 1426147) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (working copy) @@ -87,7 +87,7 @@ public PartitionDesc(final org.apache.hadoop.hive.ql.metadata.Partition part) throws HiveException { tableDesc = Utilities.getTableDesc(part.getTable()); - properties = part.getSchema(); + properties = part.getSchemaFromPartitionSchema(); partSpec = part.getSpec(); deserializerClass = part.getDeserializer(properties).getClass(); inputFileFormatClass = part.getInputFormatClass(); Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (revision 1426147) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (working copy) @@ -226,22 +226,27 @@ private static class CombinePathInputFormat { private final List> opList; private final String inputFormatClassName; + private final String deserializerClassName; public CombinePathInputFormat(List> opList, - String inputFormatClassName) { + String inputFormatClassName, + String deserializerClassName) { this.opList = opList; this.inputFormatClassName = inputFormatClassName; + this.deserializerClassName = deserializerClassName; } @Override public boolean equals(Object o) { if (o instanceof CombinePathInputFormat) { - CombinePathInputFormat mObj = (CombinePathInputFormat)o; + CombinePathInputFormat mObj = (CombinePathInputFormat) o; if (mObj == null) { return false; } - return opList.equals(mObj.opList) && - inputFormatClassName.equals(mObj.inputFormatClassName); + return (opList.equals(mObj.opList)) && + (inputFormatClassName.equals(mObj.inputFormatClassName)) && + (deserializerClassName == null ? (mObj.deserializerClassName == null) : + deserializerClassName.equals(mObj.deserializerClassName)); } return false; } @@ -296,6 +301,8 @@ Class inputFormatClass = part.getInputFileFormatClass(); String inputFormatClassName = inputFormatClass.getName(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); + String deserializerClassName = part.getDeserializerClass() == null ? null + : part.getDeserializerClass().getName(); // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not, // we use a configuration variable for the same @@ -342,12 +349,24 @@ // Does a pool exist for this path already CombineFilter f = null; List> opList = null; - boolean done = false; if (!mrwork.isMapperCannotSpanPartns()) { opList = HiveFileFormatUtils.doGetWorksFromPath( pathToAliases, aliasToWork, filterPath); - f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName)); + CombinePathInputFormat combinePathInputFormat = + new CombinePathInputFormat(opList, inputFormatClassName, deserializerClassName); + f = poolMap.get(combinePathInputFormat); + if (f == null) { + f = new CombineFilter(filterPath); + LOG.info("CombineHiveInputSplit creating pool for " + path + + "; using filter path " + filterPath); + combine.createPool(job, f); + poolMap.put(combinePathInputFormat, f); + } else { + LOG.info("CombineHiveInputSplit: pool is already created for " + path + + "; using filter path " + filterPath); + f.addPath(filterPath); + } } else { // In the case of tablesample, the input paths are pointing to files rather than directories. // We need to get the parent directory as the filtering path so that all files in the same @@ -361,24 +380,7 @@ } else { inpDirs.add(path); } - done = true; } - - if (!done) { - if (f == null) { - f = new CombineFilter(filterPath); - LOG.info("CombineHiveInputSplit creating pool for " + path + - "; using filter path " + filterPath); - combine.createPool(job, f); - if (!mrwork.isMapperCannotSpanPartns()) { - poolMap.put(new CombinePathInputFormat(opList, inputFormatClassName), f); - } - } else { - LOG.info("CombineHiveInputSplit: pool is already created for " + path + - "; using filter path " + filterPath); - f.addPath(filterPath); - } - } } // Processing directories Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (revision 1426147) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (working copy) @@ -405,7 +405,7 @@ } /** - * Get the list of operatators from the opeerator tree that are needed for the path + * Get the list of operators from the operator tree that are needed for the path * @param pathToAliases mapping from path to aliases * @param aliasToWork The operator tree to be invoked for a given alias * @param dir The path to look for