Index: metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (revision 1426393) +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (working copy) @@ -1286,7 +1286,7 @@ return null; } return new Partition(mpart.getValues(), dbName, tblName, mpart.getCreateTime(), - mpart.getLastAccessTime(), convertToStorageDescriptor(mpart.getSd(), true), + mpart.getLastAccessTime(), convertToStorageDescriptor(mpart.getSd(), false), mpart.getParameters()); } @@ -1681,7 +1681,7 @@ query.setOrdering("partitionName ascending"); List mparts = (List) query.executeWithMap(params); - // pm.retrieveAll(mparts); // retrieveAll is pessimistic. some fields may not be needed + pm.retrieveAll(mparts); // retrieveAll is pessimistic. some fields may not be needed List results = convertToParts(dbName, tblName, mparts); // pm.makeTransientAll(mparts); // makeTransient will prohibit future access of unfetched fields query.closeAll(); Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (revision 1426393) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (working copy) @@ -237,7 +237,7 @@ String lib = part.getSd().getSerdeInfo().getSerializationLib(); try { Deserializer deserializer = SerDeUtils.lookupDeserializer(lib); - deserializer.initialize(conf, MetaStoreUtils.getSchema(part, table)); + deserializer.initialize(conf, MetaStoreUtils.getPartitionSchema(part, table)); return deserializer; } catch (RuntimeException e) { throw e; @@ -497,6 +497,15 @@ .getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); } + public static Properties getPartitionSchema( + org.apache.hadoop.hive.metastore.api.Partition partition, + org.apache.hadoop.hive.metastore.api.Table table) { + return MetaStoreUtils + .getSchema(partition.getSd(), partition.getSd(), partition + .getParameters(), partition.getDbName(), partition.getTableName(), + table.getPartitionKeys()); + } + public static Properties getSchema( org.apache.hadoop.hive.metastore.api.Partition part, org.apache.hadoop.hive.metastore.api.Table table) { Index: common/src/java/org/apache/hadoop/hive/common/ObjectPair.java =================================================================== --- common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (revision 0) +++ common/src/java/org/apache/hadoop/hive/common/ObjectPair.java (working copy) @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +public class ObjectPair { + private F first; + private S second; + + public ObjectPair() {} + + public ObjectPair(F first, S second) { + this.first = first; + this.second = second; + } + + public F getFirst() { + return first; + } + + public void setFirst(F first) { + this.first = first; + } + + public S getSecond() { + return second; + } + + public void setSecond(S second) { + this.second = second; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableStructObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableStructObjectInspector.java (revision 1426393) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableStructObjectInspector.java (working copy) @@ -34,4 +34,9 @@ */ public abstract Object setStructFieldData(Object struct, StructField field, Object fieldValue); + + @Override + public boolean isSettable() { + return true; + } } Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructObjectInspector.java (revision 1426393) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructObjectInspector.java (working copy) @@ -47,6 +47,10 @@ */ public abstract List getStructFieldsDataAsList(Object data); + public boolean isSettable() { + return false; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java (revision 1426393) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java (working copy) @@ -22,10 +22,11 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; @@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; /** * ObjectInspectorConverters. @@ -59,6 +60,61 @@ } } + private static Converter getConverter(PrimitiveObjectInspector inputOI, + PrimitiveObjectInspector outputOI) { + switch (outputOI.getPrimitiveCategory()) { + case BOOLEAN: + return new PrimitiveObjectInspectorConverter.BooleanConverter( + inputOI, + (SettableBooleanObjectInspector) outputOI); + case BYTE: + return new PrimitiveObjectInspectorConverter.ByteConverter( + inputOI, + (SettableByteObjectInspector) outputOI); + case SHORT: + return new PrimitiveObjectInspectorConverter.ShortConverter( + inputOI, + (SettableShortObjectInspector) outputOI); + case INT: + return new PrimitiveObjectInspectorConverter.IntConverter( + inputOI, + (SettableIntObjectInspector) outputOI); + case LONG: + return new PrimitiveObjectInspectorConverter.LongConverter( + inputOI, + (SettableLongObjectInspector) outputOI); + case FLOAT: + return new PrimitiveObjectInspectorConverter.FloatConverter( + inputOI, + (SettableFloatObjectInspector) outputOI); + case DOUBLE: + return new PrimitiveObjectInspectorConverter.DoubleConverter( + inputOI, + (SettableDoubleObjectInspector) outputOI); + case STRING: + if (outputOI instanceof WritableStringObjectInspector) { + return new PrimitiveObjectInspectorConverter.TextConverter( + inputOI); + } else if (outputOI instanceof JavaStringObjectInspector) { + return new PrimitiveObjectInspectorConverter.StringConverter( + inputOI); + } + case TIMESTAMP: + return new PrimitiveObjectInspectorConverter.TimestampConverter( + inputOI, + (SettableTimestampObjectInspector) outputOI); + case BINARY: + return new PrimitiveObjectInspectorConverter.BinaryConverter( + inputOI, + (SettableBinaryObjectInspector)outputOI); + + default: + throw new RuntimeException("Hive internal error: conversion of " + + inputOI.getTypeName() + " to " + outputOI.getTypeName() + + " not supported yet."); + } + } + /** * Returns a converter that converts objects from one OI to another OI. The * returned (converted) object belongs to this converter, so that it can be @@ -73,57 +129,7 @@ } switch (outputOI.getCategory()) { case PRIMITIVE: - switch (((PrimitiveObjectInspector) outputOI).getPrimitiveCategory()) { - case BOOLEAN: - return new PrimitiveObjectInspectorConverter.BooleanConverter( - (PrimitiveObjectInspector) inputOI, - (SettableBooleanObjectInspector) outputOI); - case BYTE: - return new PrimitiveObjectInspectorConverter.ByteConverter( - (PrimitiveObjectInspector) inputOI, - (SettableByteObjectInspector) outputOI); - case SHORT: - return new PrimitiveObjectInspectorConverter.ShortConverter( - (PrimitiveObjectInspector) inputOI, - (SettableShortObjectInspector) outputOI); - case INT: - return new PrimitiveObjectInspectorConverter.IntConverter( - (PrimitiveObjectInspector) inputOI, - (SettableIntObjectInspector) outputOI); - case LONG: - return new PrimitiveObjectInspectorConverter.LongConverter( - (PrimitiveObjectInspector) inputOI, - (SettableLongObjectInspector) outputOI); - case FLOAT: - return new PrimitiveObjectInspectorConverter.FloatConverter( - (PrimitiveObjectInspector) inputOI, - (SettableFloatObjectInspector) outputOI); - case DOUBLE: - return new PrimitiveObjectInspectorConverter.DoubleConverter( - (PrimitiveObjectInspector) inputOI, - (SettableDoubleObjectInspector) outputOI); - case STRING: - if (outputOI instanceof WritableStringObjectInspector) { - return new PrimitiveObjectInspectorConverter.TextConverter( - (PrimitiveObjectInspector) inputOI); - } else if (outputOI instanceof JavaStringObjectInspector) { - return new PrimitiveObjectInspectorConverter.StringConverter( - (PrimitiveObjectInspector) inputOI); - } - case TIMESTAMP: - return new PrimitiveObjectInspectorConverter.TimestampConverter( - (PrimitiveObjectInspector) inputOI, - (SettableTimestampObjectInspector) outputOI); - case BINARY: - return new PrimitiveObjectInspectorConverter.BinaryConverter( - (PrimitiveObjectInspector)inputOI, - (SettableBinaryObjectInspector)outputOI); - - default: - throw new RuntimeException("Hive internal error: conversion of " - + inputOI.getTypeName() + " to " + outputOI.getTypeName() - + " not supported yet."); - } + return getConverter((PrimitiveObjectInspector) inputOI, (PrimitiveObjectInspector) outputOI); case STRUCT: return new StructConverter(inputOI, (SettableStructObjectInspector) outputOI); @@ -141,6 +147,62 @@ } /** + * Returns a converter that converts objects from one OI to another OI. The + * returned (converted) object belongs to this converter, so that it can be + * reused across different calls. + */ + public static ObjectPair getConverterOutputOI( + ObjectInspector inputOI, + ObjectInspector outputOI, + boolean identityConverterOK) { + // If the inputOI is the same as the outputOI, just return an + // IdentityConverter. + if (identityConverterOK && (inputOI == outputOI)) { + return new ObjectPair(outputOI, new IdentityConverter()); + } + switch (outputOI.getCategory()) { + case PRIMITIVE: + return new ObjectPair(outputOI, getConverter( + (PrimitiveObjectInspector) inputOI, (PrimitiveObjectInspector) outputOI)); + case STRUCT: + StructObjectInspector structOutputOI = (StructObjectInspector) outputOI; + if (structOutputOI.isSettable()) { + return new ObjectPair(outputOI, + (Converter) (new StructConverter(inputOI, (SettableStructObjectInspector) outputOI))); + } + else { + // create a standard settable struct object inspector + List listFields = structOutputOI.getAllStructFieldRefs(); + List structFieldNames = new ArrayList(listFields.size()); + List structFieldObjectInspectors = new ArrayList( + listFields.size()); + + for (StructField listField : listFields) { + structFieldNames.add(listField.getFieldName()); + structFieldObjectInspectors.add(listField.getFieldObjectInspector()); + } + + StandardStructObjectInspector structStandardOutputOI = ObjectInspectorFactory + .getStandardStructObjectInspector( + structFieldNames, + structFieldObjectInspectors); + return new ObjectPair(structStandardOutputOI, + getConverter(inputOI, structStandardOutputOI)); + } + case LIST: + return new ObjectPair(outputOI, new ListConverter(inputOI, + (SettableListObjectInspector) outputOI)); + case MAP: + return new ObjectPair(outputOI, new MapConverter(inputOI, + (SettableMapObjectInspector) outputOI)); + default: + throw new RuntimeException("Hive internal error: conversion of " + + inputOI.getTypeName() + " to " + outputOI.getTypeName() + + " not supported yet."); + } + } + + /** * A converter class for List. */ public static class ListConverter implements Converter { @@ -221,10 +283,11 @@ this.outputOI = outputOI; inputFields = this.inputOI.getAllStructFieldRefs(); outputFields = outputOI.getAllStructFieldRefs(); - assert (inputFields.size() == outputFields.size()); - fieldConverters = new ArrayList(inputFields.size()); - for (int f = 0; f < inputFields.size(); f++) { + // If the output has some extra fields, set them to NULL. + int minFields = Math.min(inputFields.size(), outputFields.size()); + fieldConverters = new ArrayList(minFields); + for (int f = 0; f < minFields; f++) { fieldConverters.add(getConverter(inputFields.get(f) .getFieldObjectInspector(), outputFields.get(f) .getFieldObjectInspector())); @@ -243,15 +306,19 @@ return null; } + int minFields = Math.min(inputFields.size(), outputFields.size()); // Convert the fields - for (int f = 0; f < inputFields.size(); f++) { - Object inputFieldValue = inputOI.getStructFieldData(input, inputFields - .get(f)); - Object outputFieldValue = fieldConverters.get(f).convert( - inputFieldValue); - outputOI.setStructFieldData(output, outputFields.get(f), - outputFieldValue); + for (int f = 0; f < minFields; f++) { + Object inputFieldValue = inputOI.getStructFieldData(input, inputFields.get(f)); + Object outputFieldValue = fieldConverters.get(f).convert(inputFieldValue); + outputOI.setStructFieldData(output, outputFields.get(f), outputFieldValue); } + + // set the extra fields to null + for (int f = minFields; f < outputFields.size(); f++) { + outputOI.setStructFieldData(output, outputFields.get(f), null); + } + return output; } } Index: ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat9.q.out (working copy) @@ -0,0 +1,109 @@ +PREHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 2 +0 val_0 2 +0 val_0 2 +10 val_10 1 +10 val_10 2 +100 val_100 1 +100 val_100 1 +100 val_100 2 +100 val_100 2 +103 val_103 1 +103 val_103 1 +103 val_103 2 +103 val_103 2 +104 val_104 1 +104 val_104 1 +104 val_104 2 +104 val_104 2 +PREHOOK: query: select key+key as key, value, dt from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: query: select key+key as key, value, dt from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0.0 val_0 1 +0.0 val_0 1 +0.0 val_0 1 +0.0 val_0 2 +0.0 val_0 2 +0.0 val_0 2 +4.0 val_2 1 +4.0 val_2 2 +8.0 val_4 1 +8.0 val_4 2 +10.0 val_5 1 +10.0 val_5 1 +10.0 val_5 1 +10.0 val_5 2 +10.0 val_5 2 +10.0 val_5 2 +16.0 val_8 1 +16.0 val_8 2 +18.0 val_9 1 +18.0 val_9 2 Index: ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat12.q.out (working copy) @@ -0,0 +1,208 @@ +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 1 +238 val_238 1 +PREHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476.0 val_238 +476.0 val_238 +PREHOOK: query: alter table partition_test_partitioned change key key int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned change key key int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476 val_238 +476 val_238 +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 1 +238 val_238 1 +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src where key = 97 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src where key = 97 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned add columns (value2 string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned add columns (value2 string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476 val_238 +476 val_238 +194 val_97 +194 val_97 +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 NULL 1 +238 val_238 NULL 1 +97 val_97 NULL 2 +97 val_97 NULL 2 +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='3') select key, value, value from src where key = 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=3 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='3') select key, value, value from src where key = 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=3 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key+key, value, value2 from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +PREHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value, value2 from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +POSTHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476 val_238 NULL +476 val_238 NULL +194 val_97 NULL +194 val_97 NULL +400 val_200 val_200 +400 val_200 val_200 +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +PREHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +POSTHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 NULL 1 +238 val_238 NULL 1 +97 val_97 NULL 2 +97 val_97 NULL 2 +200 val_200 val_200 3 +200 val_200 val_200 3 Index: ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat8.q.out (working copy) @@ -0,0 +1,143 @@ +PREHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned set fileformat sequencefile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set fileformat sequencefile +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='2') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=2 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='3') select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=3 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='3') select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=3 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +PREHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +POSTHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 1 +0 val_0 1 +0 val_0 1 +0 val_0 2 +0 val_0 2 +0 val_0 2 +0 val_0 3 +0 val_0 3 +0 val_0 3 +10 val_10 1 +10 val_10 2 +10 val_10 3 +100 val_100 1 +100 val_100 1 +100 val_100 2 +100 val_100 2 +100 val_100 3 +100 val_100 3 +103 val_103 1 +103 val_103 1 +PREHOOK: query: select key+key as key, value, dt from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +PREHOOK: Input: default@partition_test_partitioned@dt=2 +PREHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: query: select key+key as key, value, dt from partition_test_partitioned where dt is not null order by key, value, dt limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +POSTHOOK: Input: default@partition_test_partitioned@dt=2 +POSTHOOK: Input: default@partition_test_partitioned@dt=3 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0.0 val_0 1 +0.0 val_0 1 +0.0 val_0 1 +0.0 val_0 2 +0.0 val_0 2 +0.0 val_0 2 +0.0 val_0 3 +0.0 val_0 3 +0.0 val_0 3 +4.0 val_2 1 +4.0 val_2 2 +4.0 val_2 3 +8.0 val_4 1 +8.0 val_4 2 +8.0 val_4 3 +10.0 val_5 1 +10.0 val_5 1 +10.0 val_5 1 +10.0 val_5 2 +10.0 val_5 2 Index: ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat11.q.out (working copy) @@ -0,0 +1,117 @@ +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +PREHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: query: insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_test_partitioned@dt=1 +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 1 +238 val_238 1 +PREHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476.0 val_238 +476.0 val_238 +PREHOOK: query: alter table partition_test_partitioned change key key int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned change key key int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476 val_238 +476 val_238 +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 1 +238 val_238 1 +PREHOOK: query: alter table partition_test_partitioned add columns (value2 string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partition_test_partitioned +PREHOOK: Output: default@partition_test_partitioned +POSTHOOK: query: alter table partition_test_partitioned add columns (value2 string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partition_test_partitioned +POSTHOOK: Output: default@partition_test_partitioned +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476 val_238 +476 val_238 +PREHOOK: query: select * from partition_test_partitioned where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_test_partitioned where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_test_partitioned@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_test_partitioned PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 NULL 1 +238 val_238 NULL 1 Index: ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out =================================================================== --- ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out (revision 0) +++ ql/src/test/results/clientpositive/partition_wise_fileformat10.q.out (working copy) @@ -0,0 +1,75 @@ +PREHOOK: query: -- This tests that the schema can be changed for binary serde data +create table prt(key string, value string) partitioned by (dt string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- This tests that the schema can be changed for binary serde data +create table prt(key string, value string) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@prt +PREHOOK: query: insert overwrite table prt partition(dt='1') select * from src where key = 238 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@prt@dt=1 +POSTHOOK: query: insert overwrite table prt partition(dt='1') select * from src where key = 238 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@prt@dt=1 +POSTHOOK: Lineage: prt PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: prt PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from prt where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from prt where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: prt PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: prt PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 1 +238 val_238 1 +PREHOOK: query: select key+key, value from prt where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from prt where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: prt PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: prt PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476.0 val_238 +476.0 val_238 +PREHOOK: query: alter table prt add columns (value2 string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@prt +PREHOOK: Output: default@prt +POSTHOOK: query: alter table prt add columns (value2 string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@prt +POSTHOOK: Output: default@prt +POSTHOOK: Lineage: prt PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: prt PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key+key, value from prt where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select key+key, value from prt where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: prt PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: prt PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +476.0 val_238 +476.0 val_238 +PREHOOK: query: select * from prt where dt is not null +PREHOOK: type: QUERY +PREHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: query: select * from prt where dt is not null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@prt@dt=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: prt PARTITION(dt=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: prt PARTITION(dt=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 NULL 1 +238 val_238 NULL 1 Index: ql/src/test/queries/clientpositive/partition_wise_fileformat8.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat8.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat8.q (working copy) @@ -0,0 +1,13 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src; +alter table partition_test_partitioned set fileformat sequencefile; +insert overwrite table partition_test_partitioned partition(dt='2') select * from src; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; +insert overwrite table partition_test_partitioned partition(dt='3') select * from src; + +select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20; +select key+key as key, value, dt from partition_test_partitioned where dt is not null order by key, value, dt limit 20; Index: ql/src/test/queries/clientpositive/partition_wise_fileformat12.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat12.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat12.q (working copy) @@ -0,0 +1,26 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238; + +select * from partition_test_partitioned where dt is not null; +select key+key, value from partition_test_partitioned where dt is not null; + +alter table partition_test_partitioned change key key int; + +select key+key, value from partition_test_partitioned where dt is not null; +select * from partition_test_partitioned where dt is not null; + +insert overwrite table partition_test_partitioned partition(dt='2') select * from src where key = 97; + +alter table partition_test_partitioned add columns (value2 string); + +select key+key, value from partition_test_partitioned where dt is not null; +select * from partition_test_partitioned where dt is not null; + +insert overwrite table partition_test_partitioned partition(dt='3') select key, value, value from src where key = 200; + +select key+key, value, value2 from partition_test_partitioned where dt is not null; +select * from partition_test_partitioned where dt is not null; Index: ql/src/test/queries/clientpositive/partition_wise_fileformat9.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat9.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat9.q (working copy) @@ -0,0 +1,11 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that a query can span multiple partitions which can not only have different file formats, but +-- also different serdes +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +insert overwrite table partition_test_partitioned partition(dt='2') select * from src; + +select * from partition_test_partitioned where dt is not null order by key, value, dt limit 20; +select key+key as key, value, dt from partition_test_partitioned where dt is not null order by key, value, dt limit 20; \ No newline at end of file Index: ql/src/test/queries/clientpositive/partition_wise_fileformat10.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat10.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat10.q (working copy) @@ -0,0 +1,13 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that the schema can be changed for binary serde data +create table prt(key string, value string) partitioned by (dt string); +insert overwrite table prt partition(dt='1') select * from src where key = 238; + +select * from prt where dt is not null; +select key+key, value from prt where dt is not null; + +alter table prt add columns (value2 string); + +select key+key, value from prt where dt is not null; +select * from prt where dt is not null; Index: ql/src/test/queries/clientpositive/partition_wise_fileformat11.q =================================================================== --- ql/src/test/queries/clientpositive/partition_wise_fileformat11.q (revision 0) +++ ql/src/test/queries/clientpositive/partition_wise_fileformat11.q (working copy) @@ -0,0 +1,19 @@ +set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; + +-- This tests that the schema can be changed for binary serde data +create table partition_test_partitioned(key string, value string) partitioned by (dt string) stored as rcfile; +alter table partition_test_partitioned set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; +insert overwrite table partition_test_partitioned partition(dt='1') select * from src where key = 238; + +select * from partition_test_partitioned where dt is not null; +select key+key, value from partition_test_partitioned where dt is not null; + +alter table partition_test_partitioned change key key int; + +select key+key, value from partition_test_partitioned where dt is not null; +select * from partition_test_partitioned where dt is not null; + +alter table partition_test_partitioned add columns (value2 string); + +select key+key, value from partition_test_partitioned where dt is not null; +select * from partition_test_partitioned where dt is not null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -793,7 +793,7 @@ partDir.add(p); try { - partDesc.add(Utilities.getPartitionDescFromTableDesc(tblDesc, part)); + partDesc.add(Utilities.getPartitionDesc(part)); } catch (HiveException e) { LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); throw new SemanticException(e.getMessage(), e); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -214,7 +214,7 @@ getInputFormatClass(); // This will set up field: outputFormatClass getOutputFormatClass(); - + getDeserializer(); } public String getName() { @@ -276,6 +276,10 @@ return MetaStoreUtils.getSchema(tPartition, table.getTTable()); } + public Properties getSchemaFromPartitionSchema() { + return MetaStoreUtils.getPartitionSchema(tPartition, table.getTTable()); + } + public Properties getSchemaFromTableSchema(Properties tblSchema) { return MetaStoreUtils.getPartSchemaFromTableSchema(tPartition.getSd(), table.getTTable().getSd(), tPartition.getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys(), Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (working copy) @@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -45,6 +46,8 @@ import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -78,7 +81,9 @@ private transient Writable[] vcValues; private transient List vcs; private transient Object[] rowWithPartAndVC; - private transient StructObjectInspector rowObjectInspector; + private transient StructObjectInspector tblRowObjectInspector; + // convert from partition to table schema + private transient Converter partTblObjectInspectorConverter; private transient boolean isPartitioned; private transient boolean hasVC; private Map opCtxMap; @@ -141,15 +146,16 @@ } private static class MapOpCtx { - boolean isPartitioned; - StructObjectInspector rawRowObjectInspector; // without partition - StructObjectInspector partObjectInspector; // partition - StructObjectInspector rowObjectInspector; - Object[] rowWithPart; - Object[] rowWithPartAndVC; - Deserializer deserializer; - public String tableName; - public String partName; + private final boolean isPartitioned; + private final StructObjectInspector tblRawRowObjectInspector; // without partition + private final StructObjectInspector partObjectInspector; // partition + private StructObjectInspector rowObjectInspector; + private final Converter partTblObjectInspectorConverter; + private final Object[] rowWithPart; + private Object[] rowWithPartAndVC; + private final Deserializer deserializer; + private String tableName; + private String partName; /** * @param isPartitioned @@ -158,18 +164,20 @@ */ public MapOpCtx(boolean isPartitioned, StructObjectInspector rowObjectInspector, - StructObjectInspector rawRowObjectInspector, + StructObjectInspector tblRawRowObjectInspector, StructObjectInspector partObjectInspector, Object[] rowWithPart, Object[] rowWithPartAndVC, - Deserializer deserializer) { + Deserializer deserializer, + Converter partTblObjectInspectorConverter) { this.isPartitioned = isPartitioned; this.rowObjectInspector = rowObjectInspector; - this.rawRowObjectInspector = rawRowObjectInspector; + this.tblRawRowObjectInspector = tblRawRowObjectInspector; this.partObjectInspector = partObjectInspector; this.rowWithPart = rowWithPart; this.rowWithPartAndVC = rowWithPartAndVC; this.deserializer = deserializer; + this.partTblObjectInspectorConverter = partTblObjectInspectorConverter; } /** @@ -186,6 +194,10 @@ return rowObjectInspector; } + public StructObjectInspector getTblRawRowObjectInspector() { + return tblRawRowObjectInspector; + } + /** * @return the rowWithPart */ @@ -206,6 +218,10 @@ public Deserializer getDeserializer() { return deserializer; } + + public Converter getPartTblObjectInspectorConverter() { + return partTblObjectInspectorConverter; + } } /** @@ -225,38 +241,49 @@ } private MapOpCtx initObjectInspector(MapredWork conf, - Configuration hconf, String onefile) throws HiveException, + Configuration hconf, String onefile, boolean identityConverterOK) throws HiveException, ClassNotFoundException, InstantiationException, IllegalAccessException, SerDeException { - PartitionDesc td = conf.getPathToPartitionInfo().get(onefile); - LinkedHashMap partSpec = td.getPartSpec(); - Properties tblProps = td.getProperties(); + PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); + LinkedHashMap partSpec = pd.getPartSpec(); + Properties partProps = pd.getProperties(); + Properties tblProps = pd.getTableDesc().getProperties(); - Class sdclass = td.getDeserializerClass(); + Class sdclass = pd.getDeserializerClass(); if (sdclass == null) { - String className = td.getSerdeClassName(); + String className = pd.getSerdeClassName(); if ((className == "") || (className == null)) { throw new HiveException( "SerDe class or the SerDe class name is not set for table: " - + td.getProperties().getProperty("name")); + + pd.getProperties().getProperty("name")); } sdclass = hconf.getClassByName(className); } - String tableName = String.valueOf(tblProps.getProperty("name")); + String tableName = String.valueOf(partProps.getProperty("name")); String partName = String.valueOf(partSpec); - // HiveConf.setVar(hconf, HiveConf.ConfVars.HIVETABLENAME, tableName); - // HiveConf.setVar(hconf, HiveConf.ConfVars.HIVEPARTITIONNAME, partName); - Deserializer deserializer = (Deserializer) sdclass.newInstance(); - deserializer.initialize(hconf, tblProps); - StructObjectInspector rawRowObjectInspector = (StructObjectInspector) deserializer + Deserializer partDeserializer = (Deserializer) sdclass.newInstance(); + partDeserializer.initialize(hconf, partProps); + StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer .getObjectInspector(); + Deserializer tblDeserializer = (Deserializer) sdclass.newInstance(); + tblDeserializer.initialize(hconf, tblProps); + StructObjectInspector tblRawRowObjectInspector = (StructObjectInspector) tblDeserializer + .getObjectInspector(); + + ObjectPair converterOutputOI = + ObjectInspectorConverters.getConverterOutputOI(partRawRowObjectInspector, + tblRawRowObjectInspector, identityConverterOK); + + tblRawRowObjectInspector = (StructObjectInspector)converterOutputOI.getFirst(); + partTblObjectInspectorConverter = converterOutputOI.getSecond(); + MapOpCtx opCtx = null; // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns - String pcols = tblProps + String pcols = partProps .getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); // Log LOG = LogFactory.getLog(MapOperator.class.getName()); if (pcols != null && pcols.length() > 0) { @@ -285,16 +312,16 @@ rowWithPart[1] = partValues; StructObjectInspector rowObjectInspector = ObjectInspectorFactory .getUnionStructObjectInspector(Arrays - .asList(new StructObjectInspector[] {rawRowObjectInspector, partObjectInspector})); + .asList(new StructObjectInspector[] {tblRawRowObjectInspector, partObjectInspector})); // LOG.info("dump " + tableName + " " + partName + " " + // rowObjectInspector.getTypeName()); - opCtx = new MapOpCtx(true, rowObjectInspector, rawRowObjectInspector, partObjectInspector, - rowWithPart, null, deserializer); + opCtx = new MapOpCtx(true, rowObjectInspector, tblRawRowObjectInspector, partObjectInspector, + rowWithPart, null, partDeserializer, partTblObjectInspectorConverter); } else { // LOG.info("dump2 " + tableName + " " + partName + " " + // rowObjectInspector.getTypeName()); - opCtx = new MapOpCtx(false, rawRowObjectInspector, rawRowObjectInspector, null, null, - null, deserializer); + opCtx = new MapOpCtx(false, tblRawRowObjectInspector, tblRawRowObjectInspector, null, null, + null, partDeserializer, partTblObjectInspectorConverter); } opCtx.tableName = tableName; opCtx.partName = partName; @@ -312,7 +339,8 @@ isPartitioned = opCtxMap.get(inp).isPartitioned(); rowWithPart = opCtxMap.get(inp).getRowWithPart(); rowWithPartAndVC = opCtxMap.get(inp).getRowWithPartAndVC(); - rowObjectInspector = opCtxMap.get(inp).getRowObjectInspector(); + tblRowObjectInspector = opCtxMap.get(inp).getRowObjectInspector(); + partTblObjectInspectorConverter = opCtxMap.get(inp).getPartTblObjectInspectorConverter(); if (listInputPaths.contains(inp)) { return; } @@ -320,7 +348,8 @@ listInputPaths.add(inp); if (op instanceof TableScanOperator) { - StructObjectInspector rawRowObjectInspector = opCtxMap.get(inp).rawRowObjectInspector; + StructObjectInspector tblRawRowObjectInspector = + opCtxMap.get(inp).getTblRawRowObjectInspector(); StructObjectInspector partObjectInspector = opCtxMap.get(inp).partObjectInspector; TableScanOperator tsOp = (TableScanOperator) op; TableScanDesc tsDesc = tsOp.getConf(); @@ -348,22 +377,61 @@ this.rowWithPartAndVC = new Object[2]; } if (partObjectInspector == null) { - this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays + this.tblRowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays .asList(new StructObjectInspector[] { - rowObjectInspector, vcStructObjectInspector})); + tblRowObjectInspector, vcStructObjectInspector})); } else { - this.rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays + this.tblRowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays .asList(new StructObjectInspector[] { - rawRowObjectInspector, partObjectInspector, + tblRawRowObjectInspector, partObjectInspector, vcStructObjectInspector})); } - opCtxMap.get(inp).rowObjectInspector = this.rowObjectInspector; + opCtxMap.get(inp).rowObjectInspector = this.tblRowObjectInspector; opCtxMap.get(inp).rowWithPartAndVC = this.rowWithPartAndVC; } } } } + private boolean isIdentityConverterOK(Configuration hconf) throws HiveException { + try + { + for (String onefile : conf.getPathToAliases().keySet()) { + PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); + Properties partProps = pd.getProperties(); + Properties tblProps = pd.getTableDesc().getProperties(); + + Class sdclass = pd.getDeserializerClass(); + if (sdclass == null) { + String className = pd.getSerdeClassName(); + if ((className == "") || (className == null)) { + throw new HiveException( + "SerDe class or the SerDe class name is not set for table: " + + pd.getProperties().getProperty("name")); + } + sdclass = hconf.getClassByName(className); + } + + Deserializer partDeserializer = (Deserializer) sdclass.newInstance(); + partDeserializer.initialize(hconf, partProps); + StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer + .getObjectInspector(); + + Deserializer tblDeserializer = (Deserializer) sdclass.newInstance(); + tblDeserializer.initialize(hconf, tblProps); + StructObjectInspector tblRawRowObjectInspector = (StructObjectInspector) tblDeserializer + .getObjectInspector(); + + if (partRawRowObjectInspector != tblRawRowObjectInspector) { + return false; + } + } + } catch (Exception e) { + throw new HiveException(e); + } + return true; + } + public void setChildren(Configuration hconf) throws HiveException { Path fpath = new Path((new Path(HiveConf.getVar(hconf, @@ -376,9 +444,11 @@ statsMap.put(Counter.DESERIALIZE_ERRORS, deserialize_error_count); + boolean identityConverterOK = isIdentityConverterOK(hconf); + try { for (String onefile : conf.getPathToAliases().keySet()) { - MapOpCtx opCtx = initObjectInspector(conf, hconf, onefile); + MapOpCtx opCtx = initObjectInspector(conf, hconf, onefile, identityConverterOK); Path onepath = new Path(new Path(onefile).toUri().getPath()); List aliases = conf.getPathToAliases().get(onefile); @@ -514,16 +584,18 @@ Object row = null; try { if (this.hasVC) { - this.rowWithPartAndVC[0] = deserializer.deserialize(value); + this.rowWithPartAndVC[0] = + partTblObjectInspectorConverter.convert(deserializer.deserialize(value)); int vcPos = isPartitioned ? 2 : 1; if (context != null) { populateVirtualColumnValues(context, vcs, vcValues, deserializer); } this.rowWithPartAndVC[vcPos] = this.vcValues; } else if (!isPartitioned) { - row = deserializer.deserialize((Writable) value); + row = partTblObjectInspectorConverter.convert(deserializer.deserialize((Writable) value)); } else { - rowWithPart[0] = deserializer.deserialize((Writable) value); + rowWithPart[0] = + partTblObjectInspectorConverter.convert(deserializer.deserialize((Writable) value)); } } catch (Exception e) { // Serialize the row and output. @@ -542,22 +614,22 @@ try { if (this.hasVC) { - forward(this.rowWithPartAndVC, this.rowObjectInspector); + forward(this.rowWithPartAndVC, this.tblRowObjectInspector); } else if (!isPartitioned) { - forward(row, rowObjectInspector); + forward(row, tblRowObjectInspector); } else { - forward(rowWithPart, rowObjectInspector); + forward(rowWithPart, tblRowObjectInspector); } } catch (Exception e) { // Serialize the row and output the error message. String rowString; try { if (this.hasVC) { - rowString = SerDeUtils.getJSONString(rowWithPartAndVC, rowObjectInspector); + rowString = SerDeUtils.getJSONString(rowWithPartAndVC, tblRowObjectInspector); } else if (!isPartitioned) { - rowString = SerDeUtils.getJSONString(row, rowObjectInspector); + rowString = SerDeUtils.getJSONString(row, tblRowObjectInspector); } else { - rowString = SerDeUtils.getJSONString(rowWithPart, rowObjectInspector); + rowString = SerDeUtils.getJSONString(rowWithPart, tblRowObjectInspector); } } catch (Exception e2) { rowString = "[Error getting row data with exception " + Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -109,8 +109,6 @@ dummyOp.setExecContext(execContext); dummyOp.initialize(jc,null); } - - } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (working copy) @@ -50,6 +50,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.DelegatedObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -92,6 +94,9 @@ private transient Writable value; private transient Writable[] vcValues; private transient Deserializer serde; + private transient Deserializer tblSerde; + Converter partTblObjectInspectorConverter; + private transient Iterator iterPath; private transient Iterator iterPartDesc; private transient Path currPath; @@ -223,17 +228,26 @@ private StructObjectInspector setTableDesc(TableDesc table) throws Exception { Deserializer serde = table.getDeserializerClass().newInstance(); serde.initialize(job, table.getProperties()); - return createRowInspector(getCurrent(serde)); + return createRowInspector(getCurrent(serde.getObjectInspector())); } private StructObjectInspector setPrtnDesc(PartitionDesc partition) throws Exception { Deserializer serde = partition.getDeserializerClass().newInstance(); serde.initialize(job, partition.getProperties()); + + Deserializer tblSerde = partition.getTableDesc().getDeserializerClass().newInstance(); + tblSerde.initialize(job, partition.getTableDesc().getProperties()); + String pcols = partition.getTableDesc().getProperties().getProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); row[1] = createPartValue(partKeys, partition.getPartSpec()); - return createRowInspector(getCurrent(serde), partKeys); + + ObjectInspector outputOI = ObjectInspectorConverters.getConverterOutputOI( + serde.getObjectInspector(), + tblSerde.getObjectInspector(), true).getFirst(); + + return createRowInspector(getCurrent(outputOI), partKeys); } private StructObjectInspector setPrtnDesc(TableDesc table) throws Exception { @@ -243,11 +257,10 @@ org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); row[1] = null; - return createRowInspector(getCurrent(serde), partKeys); + return createRowInspector(getCurrent(serde.getObjectInspector()), partKeys); } - private StructObjectInspector getCurrent(Deserializer serde) throws SerDeException { - ObjectInspector current = serde.getObjectInspector(); + private StructObjectInspector getCurrent(ObjectInspector current) throws SerDeException { if (objectInspector != null) { current = DelegatedObjectInspectorFactory.reset(objectInspector, current); } else { @@ -384,6 +397,18 @@ serde = tmp.getDeserializerClass().newInstance(); serde.initialize(job, tmp.getProperties()); + if (currTbl != null) { + tblSerde = serde; + } + else { + tblSerde = currPart.getTableDesc().getDeserializerClass().newInstance(); + tblSerde.initialize(job, currPart.getTableDesc().getProperties()); + } + + partTblObjectInspectorConverter = ObjectInspectorConverters.getConverterOutputOI( + serde.getObjectInspector(), + tblSerde.getObjectInspector(), true).getSecond(); + if (LOG.isDebugEnabled()) { LOG.debug("Creating fetchTask with deserializer typeinfo: " + serde.getObjectInspector().getTypeName()); @@ -503,14 +528,15 @@ vcValues = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, serde); row[isPartitioned ? 2 : 1] = vcValues; } - row[0] = serde.deserialize(value); + row[0] = partTblObjectInspectorConverter.convert(serde.deserialize(value)); + if (hasVC || isPartitioned) { inspectable.o = row; inspectable.oi = rowObjectInspector; return inspectable; } inspectable.o = row[0]; - inspectable.oi = serde.getObjectInspector(); + inspectable.oi = tblSerde.getObjectInspector(); return inspectable; } else { currRecReader.close(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -40,7 +41,6 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.ql.util.ObjectPair; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; Index: ql/src/java/org/apache/hadoop/hive/ql/util/ObjectPair.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/ObjectPair.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/util/ObjectPair.java (working copy) @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.util; - -public class ObjectPair { - private F first; - private S second; - - public ObjectPair() {} - - public ObjectPair(F first, S second) { - this.first = first; - this.second = second; - } - - public F getFirst() { - return first; - } - - public void setFirst(F first) { - this.first = first; - } - - public S getSecond() { - return second; - } - - public void setSecond(S second) { - this.second = second; - } -} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (working copy) @@ -87,7 +87,7 @@ public PartitionDesc(final org.apache.hadoop.hive.ql.metadata.Partition part) throws HiveException { tableDesc = Utilities.getTableDesc(part.getTable()); - properties = part.getSchema(); + properties = part.getSchemaFromPartitionSchema(); partSpec = part.getSpec(); deserializerClass = part.getDeserializer(properties).getClass(); inputFileFormatClass = part.getInputFormatClass(); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -41,6 +41,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.TableType; @@ -165,7 +166,6 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.ql.util.ObjectPair; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (working copy) @@ -226,22 +226,27 @@ private static class CombinePathInputFormat { private final List> opList; private final String inputFormatClassName; + private final String deserializerClassName; public CombinePathInputFormat(List> opList, - String inputFormatClassName) { + String inputFormatClassName, + String deserializerClassName) { this.opList = opList; this.inputFormatClassName = inputFormatClassName; + this.deserializerClassName = deserializerClassName; } @Override public boolean equals(Object o) { if (o instanceof CombinePathInputFormat) { - CombinePathInputFormat mObj = (CombinePathInputFormat)o; + CombinePathInputFormat mObj = (CombinePathInputFormat) o; if (mObj == null) { return false; } - return opList.equals(mObj.opList) && - inputFormatClassName.equals(mObj.inputFormatClassName); + return (opList.equals(mObj.opList)) && + (inputFormatClassName.equals(mObj.inputFormatClassName)) && + (deserializerClassName == null ? (mObj.deserializerClassName == null) : + deserializerClassName.equals(mObj.deserializerClassName)); } return false; } @@ -296,6 +301,8 @@ Class inputFormatClass = part.getInputFileFormatClass(); String inputFormatClassName = inputFormatClass.getName(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); + String deserializerClassName = part.getDeserializerClass() == null ? null + : part.getDeserializerClass().getName(); // Since there is no easy way of knowing whether MAPREDUCE-1597 is present in the tree or not, // we use a configuration variable for the same @@ -342,12 +349,24 @@ // Does a pool exist for this path already CombineFilter f = null; List> opList = null; - boolean done = false; if (!mrwork.isMapperCannotSpanPartns()) { opList = HiveFileFormatUtils.doGetWorksFromPath( pathToAliases, aliasToWork, filterPath); - f = poolMap.get(new CombinePathInputFormat(opList, inputFormatClassName)); + CombinePathInputFormat combinePathInputFormat = + new CombinePathInputFormat(opList, inputFormatClassName, deserializerClassName); + f = poolMap.get(combinePathInputFormat); + if (f == null) { + f = new CombineFilter(filterPath); + LOG.info("CombineHiveInputSplit creating pool for " + path + + "; using filter path " + filterPath); + combine.createPool(job, f); + poolMap.put(combinePathInputFormat, f); + } else { + LOG.info("CombineHiveInputSplit: pool is already created for " + path + + "; using filter path " + filterPath); + f.addPath(filterPath); + } } else { // In the case of tablesample, the input paths are pointing to files rather than directories. // We need to get the parent directory as the filtering path so that all files in the same @@ -361,24 +380,7 @@ } else { inpDirs.add(path); } - done = true; } - - if (!done) { - if (f == null) { - f = new CombineFilter(filterPath); - LOG.info("CombineHiveInputSplit creating pool for " + path + - "; using filter path " + filterPath); - combine.createPool(job, f); - if (!mrwork.isMapperCannotSpanPartns()) { - poolMap.put(new CombinePathInputFormat(opList, inputFormatClassName), f); - } - } else { - LOG.info("CombineHiveInputSplit: pool is already created for " + path + - "; using filter path " + filterPath); - f.addPath(filterPath); - } - } } // Processing directories Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (revision 1426393) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (working copy) @@ -405,7 +405,7 @@ } /** - * Get the list of operatators from the opeerator tree that are needed for the path + * Get the list of operators from the operator tree that are needed for the path * @param pathToAliases mapping from path to aliases * @param aliasToWork The operator tree to be invoked for a given alias * @param dir The path to look for