diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java deleted file mode 100644 index aaf4eb4..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.util.List; - -/** - * Factory for creating ORC tree readers. These tree readers can handle type promotions and type - * conversions. - */ -public class ConversionTreeReaderFactory extends TreeReaderFactory { - - // TODO: This is currently only a place holder for type conversions. - - public static TreeReader createTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt - ) throws IOException { - return TreeReaderFactory.createTreeReader(columnId, types, included, skipCorrupt); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index ab539c4..b825104 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.DiskRange; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.io.FileFormatException; import org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterVersion; import org.apache.hadoop.hive.ql.io.orc.OrcProto.Footer; @@ -41,8 +42,21 @@ import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem; import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl.BufferChunk; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; import com.google.common.collect.Lists; @@ -72,6 +86,12 @@ private final List versionList; private final OrcFile.WriterVersion writerVersion; + private final List fileTypes; + + private final List schemaEvolutionTypes; + private final int schemaEvolutionStructSubtype; + private final int schemaEvolutionStructOrigColumns; + //serialized footer - Keeping this around for use by getFileMetaInfo() // will help avoid cpu cycles spend in deserializing at cost of increased // memory footprint. @@ -186,7 +206,7 @@ public long getContentLength() { @Override public List getTypes() { - return footer.getTypesList(); + return schemaEvolutionTypes; } @Override @@ -332,9 +352,19 @@ public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException { this.metadataSize = rInfo.metadataSize; this.metadata = rInfo.metadata; this.footer = rInfo.footer; - this.inspector = rInfo.inspector; this.versionList = footerMetaData.versionList; this.writerVersion = footerMetaData.writerVersion; + + this.fileTypes = this.footer.getTypesList(); + + SchemaEvolution schemaEvolution = + new SchemaEvolution(this.fileTypes); + + this.schemaEvolutionTypes = schemaEvolution.schemaEvolutionTypes; + this.schemaEvolutionStructSubtype = schemaEvolution.schemaEvolutionStructSubtype; + this.schemaEvolutionStructOrigColumns = schemaEvolution.schemaEvolutionStructOrigColumns; + + this.inspector = OrcStruct.createObjectInspector(0, schemaEvolutionTypes); } /** @@ -557,9 +587,8 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, final int metadataSize; final OrcProto.Metadata metadata; final OrcProto.Footer footer; - final ObjectInspector inspector; - MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, + MetaInfoObjExtractor(String codecStr, int bufferSize, int metadataSize, ByteBuffer footerBuffer) throws IOException { this.compressionKind = CompressionKind.valueOf(codecStr); @@ -575,7 +604,6 @@ private static FileMetaInfo extractMetaInfoFromFooter(FileSystem fs, footerBuffer, position + metadataSize, footerBufferSize, codec, bufferSize); footerBuffer.position(position); - this.inspector = OrcStruct.createObjectInspector(0, footer.getTypesList()); } } @@ -672,8 +700,8 @@ public RecordReader rowsOptions(Options options) throws IOException { options.include(include); } return new RecordReaderImpl(this.getStripes(), fileSystem, path, - options, footer.getTypesList(), codec, bufferSize, - footer.getRowIndexStride(), conf); + options, fileTypes, schemaEvolutionTypes, schemaEvolutionStructSubtype, + schemaEvolutionStructOrigColumns, codec, bufferSize, footer.getRowIndexStride(), conf); } @@ -853,4 +881,334 @@ public MetadataReader metadata() throws IOException { public Footer getFooter() { return footer; } + + static List getAcidEventFields() { + return Lists.newArrayList("operation", "originalTransaction", "bucket", + "rowId", "currentTransaction", "row"); + } + + private class SchemaEvolution { + + final List schemaEvolutionTypes; + final int schemaEvolutionStructSubtype; + final int schemaEvolutionStructOrigColumns; + + public SchemaEvolution(List fileTypes) throws IOException { + + List types = null; + int lastSubtype = -1; + int structSubtype = -1; + int structOrigColumns = -1; + + if (!fileTypes.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) { + + // No schema evolution for non-STRUCT. + types = fileTypes; + + } else { + final StructObjectInspector desiredObjectInspector = getDesiredRowObjectInspector(conf); + if (desiredObjectInspector == null) { + + // No desired schema found in configuration properties. + types = fileTypes; + + } else { + + // For ACID, the row is the ROW field in the outer STRUCT. + final boolean isAcid = checkAcidSchema(fileTypes); + final List rowSchema; + int rowSubtype; + if (isAcid) { + rowSubtype = OrcRecordUpdater.ROW + 1; + rowSchema = fileTypes.subList(rowSubtype, fileTypes.size()); + } else { + rowSubtype = 0; + rowSchema = fileTypes; + } + + // Do checking on the overlap. Additional columns will be defaulted to NULL. + final List desiredRowSchema = getOrcTypes(desiredObjectInspector); + + int numOrigColumns = rowSchema.get(0).getSubtypesCount(); + int numNewColumns = desiredRowSchema.get(0).getSubtypesCount(); + int numOverlapColumns = Math.min(numOrigColumns, numNewColumns); + + // Check type promotion. ORC can only support type promotions for integer types + // short -> int -> bigint as same integer readers are used for the above types. + + for (int i = 0; i < numOverlapColumns; i++) { + OrcProto.Type fColType = fileTypes.get(rowSubtype + i); + OrcProto.Type rColType = desiredRowSchema.get(i); + if (!fColType.getKind().equals(rColType.getKind())) { + + boolean ok = false; + if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) { + + if (rColType.getKind().equals(OrcProto.Type.Kind.INT) || + rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { + // type promotion possible, converting SHORT to INT/LONG requested type + ok = true; + } + } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) { + + if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { + // type promotion possible, converting INT to LONG requested type + ok = true; + } + } + + if (!ok) { + throw new IOException("ORC does not support type conversion from " + + fColType.getKind().name() + " to " + rColType.getKind().name()); + } + } + } + + types = new ArrayList(); + + if (isAcid) { + + // This copies the ACID struct type which is subtype = 0. + // It has field names "operation" through "row". + // And we copy the types for all fields EXCEPT ROW (which must be last!). + + for (int i = 0; i < rowSubtype; i++) { + types.add(fileTypes.get(i).toBuilder().build()); + } + } + // Add the row struct type. + getOrcTypesAppend(types, desiredObjectInspector); + + structSubtype = rowSubtype; + structOrigColumns = numOrigColumns; + } + } + schemaEvolutionTypes = types; + schemaEvolutionStructSubtype = structSubtype; + schemaEvolutionStructOrigColumns = structOrigColumns; + } + + private boolean checkAcidSchema(List fileSchema) { + if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) { + List acidFields = getAcidEventFields(); + List rootFields = fileSchema.get(0).getFieldNamesList(); + if (acidFields.equals(rootFields)) { + return true; + } + } + return false; + } + + private StructObjectInspector getDesiredRowObjectInspector(Configuration conf) { + + String metaTableColumnProperty = conf.get(hive_metastoreConstants.META_TABLE_COLUMNS); + String metaTableTypeProperty = conf.get(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); + if (metaTableColumnProperty == null || metaTableTypeProperty == null) { + return null; + } + + ArrayList metaTableColumnNames = Lists.newArrayList(metaTableColumnProperty.split(",")); + if (metaTableColumnNames.size() == 0) { + return null; + } + ArrayList metaTableTypes = TypeInfoUtils.getTypeInfosFromTypeString(metaTableTypeProperty); + if (metaTableTypes.size() != metaTableColumnNames.size()) { + return null; + } + + // Find first virtual column and clip them off. + int virtualColumnClipNum = -1; + + int columnNum = 0; + for (String metaTableColumn : metaTableColumnNames) { + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(metaTableColumn)) { + virtualColumnClipNum = columnNum; + break; + } + columnNum++; + } + + String partitionColumnProperty = conf.get(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); + ArrayList partitionColumnNames = null; + int partitionColumnNameCount = 0; + if (partitionColumnProperty != null) { + partitionColumnNames = Lists.newArrayList(partitionColumnProperty.split(",")); + partitionColumnNames = (ArrayList) VirtualColumn.removeVirtualColumns(partitionColumnNames); + partitionColumnNameCount = partitionColumnNames.size(); + } + + if (virtualColumnClipNum != -1 || partitionColumnNameCount > 0) { + + // Clip off virtual column names and partition names. + int columnClipNum = (virtualColumnClipNum != -1 ? + virtualColumnClipNum - partitionColumnNameCount : + metaTableColumnNames.size() - partitionColumnNameCount); + + if (columnClipNum < metaTableColumnNames.size()) { + metaTableColumnNames = Lists.newArrayList(metaTableColumnNames.subList(0, columnClipNum)); + metaTableTypes = Lists.newArrayList(metaTableTypes.subList(0, columnClipNum)); + } + } + + // Does not include virtual columns or partition columns. + StructTypeInfo structTypeInfo = new StructTypeInfo(); + structTypeInfo.setAllStructFieldNames(metaTableColumnNames); + structTypeInfo.setAllStructFieldTypeInfos(metaTableTypes); + return (StructObjectInspector) new OrcStruct.OrcStructInspector(structTypeInfo); + } + + private List getDesiredRowSchema(Configuration conf) { + StructObjectInspector desiredObjectInspector = + getDesiredRowObjectInspector(conf); + if (desiredObjectInspector == null) { + return null; + } + return getOrcTypes(desiredObjectInspector); + } + + private List getOrcTypes(ObjectInspector inspector) { + List result = Lists.newArrayList(); + getOrcTypesAppend(result, inspector); + return result; + } + + private void getOrcTypesAppend(List result, ObjectInspector inspector) { + int subtype = result.size(); + OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); + switch (inspector.getCategory()) { + case PRIMITIVE: + switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) { + case BOOLEAN: + type.setKind(OrcProto.Type.Kind.BOOLEAN); + break; + case BYTE: + type.setKind(OrcProto.Type.Kind.BYTE); + break; + case SHORT: + type.setKind(OrcProto.Type.Kind.SHORT); + break; + case INT: + type.setKind(OrcProto.Type.Kind.INT); + break; + case LONG: + type.setKind(OrcProto.Type.Kind.LONG); + break; + case FLOAT: + type.setKind(OrcProto.Type.Kind.FLOAT); + break; + case DOUBLE: + type.setKind(OrcProto.Type.Kind.DOUBLE); + break; + case STRING: + type.setKind(OrcProto.Type.Kind.STRING); + break; + case CHAR: + // The char length needs to be written to file and should be available + // from the object inspector + CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) inspector) + .getTypeInfo(); + type.setKind(OrcProto.Type.Kind.CHAR); + type.setMaximumLength(charTypeInfo.getLength()); + break; + case VARCHAR: + // The varchar length needs to be written to file and should be available + // from the object inspector + VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) inspector) + .getTypeInfo(); + type.setKind(OrcProto.Type.Kind.VARCHAR); + type.setMaximumLength(typeInfo.getLength()); + break; + case BINARY: + type.setKind(OrcProto.Type.Kind.BINARY); + break; + case TIMESTAMP: + type.setKind(OrcProto.Type.Kind.TIMESTAMP); + break; + case DATE: + type.setKind(OrcProto.Type.Kind.DATE); + break; + case DECIMAL: + DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) inspector) + .getTypeInfo(); + type.setKind(OrcProto.Type.Kind.DECIMAL); + type.setPrecision(decTypeInfo.precision()); + type.setScale(decTypeInfo.scale()); + break; + default: + throw new IllegalArgumentException("Unknown primitive category: " + + ((PrimitiveObjectInspector) inspector).getPrimitiveCategory()); + } + result.add(type.build()); + break; + case LIST: + type.setKind(OrcProto.Type.Kind.LIST); + type.addSubtypes(++subtype); + result.add(type.build()); + getOrcTypesAppend(result, ((ListObjectInspector) inspector).getListElementObjectInspector()); + break; + case MAP: + { + // Make room for MAP type. + result.add(null); + + // Add MAP type pair in order to determine their subtype values. + getOrcTypesAppend(result, ((MapObjectInspector) inspector).getMapKeyObjectInspector()); + int subtype2 = result.size(); + getOrcTypesAppend(result, ((MapObjectInspector) inspector).getMapValueObjectInspector()); + type.setKind(OrcProto.Type.Kind.MAP); + type.addSubtypes(subtype + 1); + type.addSubtypes(subtype2); + result.set(subtype, type.build()); + } + break; + case STRUCT: + { + List fields = + ((StructObjectInspector) inspector).getAllStructFieldRefs(); + + // Make room for STRUCT type. + result.add(null); + + List fieldSubtypes = new ArrayList(fields.size()); + for (int i = 0 ; i < fields.size(); i++) { + int fieldSubtype = result.size(); + fieldSubtypes.add(fieldSubtype); + getOrcTypesAppend(result, fields.get(i).getFieldObjectInspector()); + } + + type.setKind(OrcProto.Type.Kind.STRUCT); + for (int i = 0 ; i < fields.size(); i++) { + type.addSubtypes(fieldSubtypes.get(i)); + type.addFieldNames(fields.get(i).getFieldName()); + } + result.set(subtype, type.build()); + } + break; + case UNION: + { + List unionInspectors = + ((UnionObjectInspector) inspector).getObjectInspectors(); + + // Make room for UNION type. + result.add(null); + + List unionSubtypes = new ArrayList(unionInspectors.size()); + for (int i = 0 ; i < unionInspectors.size(); i++) { + int unionSubtype = result.size(); + unionSubtypes.add(unionSubtype); + getOrcTypesAppend(result, unionInspectors.get(i)); + } + + type.setKind(OrcProto.Type.Kind.UNION); + for (int i = 0 ; i < unionInspectors.size(); i++) { + type.addSubtypes(unionSubtypes.get(i)); + } + result.set(subtype, type.build()); + } + break; + default: + throw new IllegalArgumentException("Unknown category: " + inspector.getCategory()); + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java deleted file mode 100644 index 23a9af4..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java +++ /dev/null @@ -1,274 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; - -import com.google.common.collect.Lists; - -/** - * Factory to create ORC tree readers. It also compares file schema with schema specified on read - * to see if type promotions are possible. - */ -public class RecordReaderFactory { - static final Log LOG = LogFactory.getLog(RecordReaderFactory.class); - private static final boolean isLogInfoEnabled = LOG.isInfoEnabled(); - - public static TreeReaderFactory.TreeReader createTreeReader(int colId, - Configuration conf, - List fileSchema, - boolean[] included, - boolean skipCorrupt) throws IOException { - final boolean isAcid = checkAcidSchema(fileSchema); - final List originalFileSchema; - if (isAcid) { - originalFileSchema = fileSchema.subList(fileSchema.get(0).getSubtypesCount(), - fileSchema.size()); - } else { - originalFileSchema = fileSchema; - } - final int numCols = originalFileSchema.get(0).getSubtypesCount(); - List schemaOnRead = getSchemaOnRead(numCols, conf); - List schemaUsed = getMatchingSchema(fileSchema, schemaOnRead); - if (schemaUsed == null) { - return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt); - } else { - return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt); - } - } - - static List getAcidEventFields() { - return Lists.newArrayList("operation", "originalTransaction", "bucket", - "rowId", "currentTransaction", "row"); - } - - private static boolean checkAcidSchema(List fileSchema) { - if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) { - List acidFields = getAcidEventFields(); - List rootFields = fileSchema.get(0).getFieldNamesList(); - if (acidFields.equals(rootFields)) { - return true; - } - } - return false; - } - - private static List getMatchingSchema(List fileSchema, - List schemaOnRead) { - if (schemaOnRead == null) { - if (isLogInfoEnabled) { - LOG.info("Schema is not specified on read. Using file schema."); - } - return null; - } - - if (fileSchema.size() != schemaOnRead.size()) { - if (isLogInfoEnabled) { - LOG.info("Schema on read column count does not match file schema's column count." + - " Falling back to using file schema."); - } - return null; - } else { - List result = Lists.newArrayList(fileSchema); - // check type promotion. ORC can only support type promotions for integer types - // short -> int -> bigint as same integer readers are used for the above types. - boolean canPromoteType = false; - for (int i = 0; i < fileSchema.size(); i++) { - OrcProto.Type fColType = fileSchema.get(i); - OrcProto.Type rColType = schemaOnRead.get(i); - if (!fColType.getKind().equals(rColType.getKind())) { - - if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) { - - if (rColType.getKind().equals(OrcProto.Type.Kind.INT) || - rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { - // type promotion possible, converting SHORT to INT/LONG requested type - result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); - canPromoteType = true; - } else { - canPromoteType = false; - } - - } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) { - - if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { - // type promotion possible, converting INT to LONG requested type - result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); - canPromoteType = true; - } else { - canPromoteType = false; - } - - } else { - canPromoteType = false; - } - } - } - - if (canPromoteType) { - if (isLogInfoEnabled) { - LOG.info("Integer type promotion happened in ORC record reader. Using promoted schema."); - } - return result; - } - } - - return null; - } - - private static List getSchemaOnRead(int numCols, Configuration conf) { - String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); - final String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); - if (columnTypeProperty == null || columnNameProperty == null) { - return null; - } - - ArrayList columnNames = Lists.newArrayList(columnNameProperty.split(",")); - ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - StructTypeInfo structTypeInfo = new StructTypeInfo(); - // Column types from conf includes virtual and partition columns at the end. We consider only - // the actual columns in the file. - structTypeInfo.setAllStructFieldNames(Lists.newArrayList(columnNames.subList(0, numCols))); - structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList(fieldTypes.subList(0, numCols))); - ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(structTypeInfo); - return getOrcTypes(oi); - } - - private static List getOrcTypes(ObjectInspector inspector) { - List result = Lists.newArrayList(); - getOrcTypesImpl(result, inspector); - return result; - } - - private static void getOrcTypesImpl(List result, ObjectInspector inspector) { - OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); - switch (inspector.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) { - case BOOLEAN: - type.setKind(OrcProto.Type.Kind.BOOLEAN); - break; - case BYTE: - type.setKind(OrcProto.Type.Kind.BYTE); - break; - case SHORT: - type.setKind(OrcProto.Type.Kind.SHORT); - break; - case INT: - type.setKind(OrcProto.Type.Kind.INT); - break; - case LONG: - type.setKind(OrcProto.Type.Kind.LONG); - break; - case FLOAT: - type.setKind(OrcProto.Type.Kind.FLOAT); - break; - case DOUBLE: - type.setKind(OrcProto.Type.Kind.DOUBLE); - break; - case STRING: - type.setKind(OrcProto.Type.Kind.STRING); - break; - case CHAR: - // The char length needs to be written to file and should be available - // from the object inspector - CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) inspector) - .getTypeInfo(); - type.setKind(OrcProto.Type.Kind.CHAR); - type.setMaximumLength(charTypeInfo.getLength()); - break; - case VARCHAR: - // The varchar length needs to be written to file and should be available - // from the object inspector - VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) inspector) - .getTypeInfo(); - type.setKind(OrcProto.Type.Kind.VARCHAR); - type.setMaximumLength(typeInfo.getLength()); - break; - case BINARY: - type.setKind(OrcProto.Type.Kind.BINARY); - break; - case TIMESTAMP: - type.setKind(OrcProto.Type.Kind.TIMESTAMP); - break; - case DATE: - type.setKind(OrcProto.Type.Kind.DATE); - break; - case DECIMAL: - DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) inspector) - .getTypeInfo(); - type.setKind(OrcProto.Type.Kind.DECIMAL); - type.setPrecision(decTypeInfo.precision()); - type.setScale(decTypeInfo.scale()); - break; - default: - throw new IllegalArgumentException("Unknown primitive category: " + - ((PrimitiveObjectInspector) inspector).getPrimitiveCategory()); - } - result.add(type.build()); - break; - case LIST: - type.setKind(OrcProto.Type.Kind.LIST); - result.add(type.build()); - getOrcTypesImpl(result, ((ListObjectInspector) inspector).getListElementObjectInspector()); - break; - case MAP: - type.setKind(OrcProto.Type.Kind.MAP); - result.add(type.build()); - getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapKeyObjectInspector()); - getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapValueObjectInspector()); - break; - case STRUCT: - type.setKind(OrcProto.Type.Kind.STRUCT); - result.add(type.build()); - for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) { - getOrcTypesImpl(result, field.getFieldObjectInspector()); - } - break; - case UNION: - type.setKind(OrcProto.Type.Kind.UNION); - result.add(type.build()); - for (ObjectInspector oi : ((UnionObjectInspector) inspector).getObjectInspectors()) { - getOrcTypesImpl(result, oi); - } - break; - default: - throw new IllegalArgumentException("Unknown category: " + inspector.getCategory()); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index ba304ba..26faece 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -153,7 +153,10 @@ protected RecordReaderImpl(List stripes, FileSystem fileSystem, Path path, Reader.Options options, - List types, + List fileTypes, + List schemaEvolutionTypes, + int schemaEvolutionStructSubtype, + int schemaEvolutionStructOrigColumns, CompressionCodec codec, int bufferSize, long strideRate, @@ -162,7 +165,7 @@ protected RecordReaderImpl(List stripes, this.path = path; this.file = fileSystem.open(path); this.codec = codec; - this.types = types; + this.types = fileTypes; this.bufferSize = bufferSize; this.included = options.getInclude(); this.conf = conf; @@ -202,7 +205,11 @@ protected RecordReaderImpl(List stripes, skipCorrupt = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf); } - reader = RecordReaderFactory.createTreeReader(0, conf, types, included, skipCorrupt); + reader = + TreeReaderFactory.createTreeReader(0, fileTypes, schemaEvolutionTypes, + schemaEvolutionStructSubtype, schemaEvolutionStructOrigColumns, + included, skipCorrupt); + indexes = new OrcProto.RowIndex[types.size()]; bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()]; advanceToNextRow(reader, 0L, true); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java index 6d47532..3456edc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java @@ -1947,22 +1947,37 @@ public Object nextVector(Object previousVector, long batchSize) throws IOExcepti } protected static class StructTreeReader extends TreeReader { + private final int schemaEvolutionStructColumns; + private final boolean needsDefaultNulls; protected final TreeReader[] fields; private final String[] fieldNames; StructTreeReader(int columnId, - List types, + List fileTypes, + List schemaEvolutionTypes, + int schemaEvolutionSubtype, + int schemaEvolutionStructOrigColumns, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); - int fieldCount = type.getFieldNamesCount(); + OrcProto.Type type = schemaEvolutionTypes.get(columnId); + schemaEvolutionStructColumns = type.getFieldNamesCount(); + needsDefaultNulls = + (columnId == schemaEvolutionSubtype && + schemaEvolutionStructOrigColumns < schemaEvolutionStructColumns); + int fieldCount; + if (needsDefaultNulls) { + fieldCount = schemaEvolutionStructOrigColumns; + } else { + fieldCount = schemaEvolutionStructColumns; + } this.fields = new TreeReader[fieldCount]; this.fieldNames = new String[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); + this.fields[i] = createTreeReader(subtype, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); } this.fieldNames[i] = type.getFieldNames(i); } @@ -1984,15 +1999,15 @@ Object next(Object previous) throws IOException { OrcStruct result = null; if (valuePresent) { if (previous == null) { - result = new OrcStruct(fields.length); + result = new OrcStruct(schemaEvolutionStructColumns); } else { result = (OrcStruct) previous; // If the input format was initialized with a file with a // different number of fields, the number of fields needs to // be updated to the correct number - if (result.getNumFields() != fields.length) { - result.setNumFields(fields.length); + if (result.getNumFields() != schemaEvolutionStructColumns) { + result.setNumFields(schemaEvolutionStructColumns); } } for (int i = 0; i < fields.length; ++i) { @@ -2000,6 +2015,12 @@ Object next(Object previous) throws IOException { result.setFieldValue(i, fields[i].next(result.getFieldValue(i))); } } + if (needsDefaultNulls) { + for (int i = fields.length; i < schemaEvolutionStructColumns; ++i) { + // Default new schema evolution fields to NULL. + result.setFieldValue(i, null); + } + } } return result; } @@ -2054,17 +2075,21 @@ void skipRows(long items) throws IOException { protected RunLengthByteReader tags; UnionTreeReader(int columnId, - List types, + List fileTypes, + List schemaEvolutionTypes, + int schemaEvolutionSubtype, + int schemaEvolutionStructOrigColumns, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); + OrcProto.Type type = schemaEvolutionTypes.get(columnId); int fieldCount = type.getSubtypesCount(); this.fields = new TreeReader[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); + this.fields[i] = createTreeReader(subtype, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); } } } @@ -2134,12 +2159,16 @@ void skipRows(long items) throws IOException { protected IntegerReader lengths = null; ListTreeReader(int columnId, - List types, + List fileTypes, + List schemaEvolutionTypes, + int schemaEvolutionSubtype, + int schemaEvolutionStructOrigColumns, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); - elementReader = createTreeReader(type.getSubtypes(0), types, included, skipCorrupt); + OrcProto.Type type = schemaEvolutionTypes.get(columnId); + elementReader = createTreeReader(type.getSubtypes(0), fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); } @Override @@ -2224,20 +2253,25 @@ void skipRows(long items) throws IOException { protected IntegerReader lengths = null; MapTreeReader(int columnId, - List types, + List fileTypes, + List schemaEvolutionTypes, + int schemaEvolutionSubtype, + int schemaEvolutionStructOrigColumns, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); + OrcProto.Type type = schemaEvolutionTypes.get(columnId); int keyColumn = type.getSubtypes(0); int valueColumn = type.getSubtypes(1); if (included == null || included[keyColumn]) { - keyReader = createTreeReader(keyColumn, types, included, skipCorrupt); + keyReader = createTreeReader(keyColumn, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); } else { keyReader = null; } if (included == null || included[valueColumn]) { - valueReader = createTreeReader(valueColumn, types, included, skipCorrupt); + valueReader = createTreeReader(valueColumn, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); } else { valueReader = null; } @@ -2317,11 +2351,14 @@ void skipRows(long items) throws IOException { } public static TreeReader createTreeReader(int columnId, - List types, + List fileTypes, + List schemaEvolutionTypes, + int schemaEvolutionSubtype, + int schemaEvolutionStructOrigColumns, boolean[] included, boolean skipCorrupt ) throws IOException { - OrcProto.Type type = types.get(columnId); + OrcProto.Type type = schemaEvolutionTypes.get(columnId); switch (type.getKind()) { case BOOLEAN: return new BooleanTreeReader(columnId); @@ -2361,13 +2398,17 @@ public static TreeReader createTreeReader(int columnId, int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; return new DecimalTreeReader(columnId, precision, scale); case STRUCT: - return new StructTreeReader(columnId, types, included, skipCorrupt); + return new StructTreeReader(columnId, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); case LIST: - return new ListTreeReader(columnId, types, included, skipCorrupt); + return new ListTreeReader(columnId, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); case MAP: - return new MapTreeReader(columnId, types, included, skipCorrupt); + return new MapTreeReader(columnId, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); case UNION: - return new UnionTreeReader(columnId, types, included, skipCorrupt); + return new UnionTreeReader(columnId, fileTypes, schemaEvolutionTypes, + schemaEvolutionSubtype, schemaEvolutionStructOrigColumns, included, skipCorrupt); default: throw new IllegalArgumentException("Unsupported type " + type.getKind()); diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 02fa725..7fd4822 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -28,15 +28,19 @@ import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.CompactionType; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatusWithId; @@ -99,12 +103,13 @@ public CompactorMR() { * @param conf Hive configuration file * @param jobName name to run this job with * @param t metastore table + * @param p metastore partition * @param sd metastore storage descriptor * @param txns list of valid transactions * @param isMajor is this a major compaction? * @throws java.io.IOException if the job fails */ - void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, + void run(HiveConf conf, String jobName, Table t, Partition p, StorageDescriptor sd, ValidTxnList txns, boolean isMajor, Worker.StatsUpdater su) throws IOException { JobConf job = new JobConf(conf); job.setJobName(jobName); @@ -127,7 +132,7 @@ void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); - setColumnTypes(job, sd.getCols()); + setColumnTypes(job, t, p); // Figure out and encode what files we need to read. We do this here (rather than in // getSplits below) because as part of this we discover our minimum and maximum transactions, @@ -195,27 +200,29 @@ void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, } /** - * Set the column names and types into the job conf for the input format - * to use. + * Set the column names and types into the job conf for the ORC input format + * to use for schema evolution. * @param job the job to update - * @param cols the columns of the table + * @param the table + * @param the partition */ - private void setColumnTypes(JobConf job, List cols) { - StringBuilder colNames = new StringBuilder(); - StringBuilder colTypes = new StringBuilder(); - boolean isFirst = true; - for(FieldSchema col: cols) { - if (isFirst) { - isFirst = false; - } else { - colNames.append(','); - colTypes.append(','); - } - colNames.append(col.getName()); - colTypes.append(col.getType()); - } - job.set(serdeConstants.LIST_COLUMNS, colNames.toString()); - job.set(serdeConstants.LIST_COLUMN_TYPES, colTypes.toString()); + private void setColumnTypes(JobConf job, Table t, Partition p) { + + Properties properties = MetaStoreUtils.getPartitionMetadata(p, t); + + job.set(serdeConstants.LIST_COLUMNS, + properties.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS)); + job.set(serdeConstants.LIST_COLUMN_TYPES, + properties.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES)); + + job.set(hive_metastoreConstants.META_TABLE_COLUMNS, + properties.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS)); + job.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, + properties.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES)); + job.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, + properties.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS)); + job.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES, + properties.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES)); } static class CompactorInputSplit implements InputSplit { diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java index 0548117..8728bc9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java @@ -108,7 +108,7 @@ public void run() { final Table t = t1; // Find the partition we will be working with, if there is one. - Partition p = null; + final Partition p; try { p = resolvePartition(ci); if (p == null && ci.partName != null) { @@ -158,14 +158,14 @@ public void run() { launchedJob = true; try { if (runJobAsSelf(runAs)) { - mr.run(conf, jobName.toString(), t, sd, txns, isMajor, su); + mr.run(conf, jobName.toString(), t, p, sd, txns, isMajor, su); } else { UserGroupInformation ugi = UserGroupInformation.createProxyUser(t.getOwner(), UserGroupInformation.getLoginUser()); ugi.doAs(new PrivilegedExceptionAction() { @Override public Object run() throws Exception { - mr.run(conf, jobName.toString(), t, sd, txns, isMajor, su); + mr.run(conf, jobName.toString(), t, p, sd, txns, isMajor, su); return null; } }); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 8ba4d2e..7d1640a 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -144,6 +144,8 @@ public static String toKryo(SearchArgument sarg) { Date dateValue; Timestamp timestampValue; + String partitionValue; + BigRow(long x) { booleanValue = x % 2 == 0; byteValue = (byte) x; @@ -158,6 +160,8 @@ public static String toKryo(SearchArgument sarg) { millisUtc -= LOCAL_TIMEZONE.getOffset(millisUtc); dateValue = new Date(millisUtc); timestampValue = new Timestamp(millisUtc); + + partitionValue = "partition"; } @Override @@ -262,6 +266,10 @@ public String toString() { PrimitiveObjectInspectorFactory.javaDateObjectInspector)); FIELDS.add(new BigRowField(10, "timestampValue", PrimitiveObjectInspectorFactory.javaTimestampObjectInspector)); + + // Partition Column + FIELDS.add(new BigRowField(11, "p", + PrimitiveObjectInspectorFactory.javaStringObjectInspector)); } @@ -306,6 +314,8 @@ public Object getStructFieldData(Object data, StructField fieldRef) { return obj.dateValue; case 10: return obj.timestampValue; + case 11: + return obj.partitionValue; } throw new IllegalArgumentException("No such field " + fieldRef); } diff --git ql/src/test/queries/clientpositive/schema_evol_acid_table.q ql/src/test/queries/clientpositive/schema_evol_acid_table.q new file mode 100644 index 0000000..8b75222 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_acid_table.q @@ -0,0 +1,15 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; + + +CREATE TABLE acid_partitioned2(a INT, b STRING) PARTITIONED BY(bkt INT) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true"); + +insert into table acid_partitioned2 partition(bkt=1) values(1, 'part one'),(2, 'part one'), (3, 'part two'),(4, 'part three'); + +alter table acid_partitioned2 add columns(c int, d string); + +insert into table acid_partitioned2 partition(bkt=2) values(1, 'part one', 10, 'str10'),(2, 'part one', 20, 'str20'), (3, 'part two', 30, 'str30'),(4, 'part three', 40, 'str40'); +insert into table acid_partitioned2 partition(bkt=1) values(5, 'part one', 1, 'blah'),(6, 'part one', 2, 'doh!'); + +select bkt,a,b,c,d from acid_partitioned2 order by a; \ No newline at end of file diff --git ql/src/test/results/clientpositive/schema_evol_acid_table.q.out ql/src/test/results/clientpositive/schema_evol_acid_table.q.out new file mode 100644 index 0000000..6df7993 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_acid_table.q.out @@ -0,0 +1,72 @@ +PREHOOK: query: CREATE TABLE acid_partitioned2(a INT, b STRING) PARTITIONED BY(bkt INT) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_partitioned2 +POSTHOOK: query: CREATE TABLE acid_partitioned2(a INT, b STRING) PARTITIONED BY(bkt INT) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_partitioned2 +PREHOOK: query: insert into table acid_partitioned2 partition(bkt=1) values(1, 'part one'),(2, 'part one'), (3, 'part two'),(4, 'part three') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acid_partitioned2@bkt=1 +POSTHOOK: query: insert into table acid_partitioned2 partition(bkt=1) values(1, 'part one'),(2, 'part one'), (3, 'part two'),(4, 'part three') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acid_partitioned2@bkt=1 +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: alter table acid_partitioned2 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@acid_partitioned2 +PREHOOK: Output: default@acid_partitioned2 +POSTHOOK: query: alter table acid_partitioned2 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@acid_partitioned2 +POSTHOOK: Output: default@acid_partitioned2 +PREHOOK: query: insert into table acid_partitioned2 partition(bkt=2) values(1, 'part one', 10, 'str10'),(2, 'part one', 20, 'str20'), (3, 'part two', 30, 'str30'),(4, 'part three', 40, 'str40') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@acid_partitioned2@bkt=2 +POSTHOOK: query: insert into table acid_partitioned2 partition(bkt=2) values(1, 'part one', 10, 'str10'),(2, 'part one', 20, 'str20'), (3, 'part two', 30, 'str30'),(4, 'part three', 40, 'str40') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@acid_partitioned2@bkt=2 +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: insert into table acid_partitioned2 partition(bkt=1) values(5, 'part one', 1, 'blah'),(6, 'part one', 2, 'doh!') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@acid_partitioned2@bkt=1 +POSTHOOK: query: insert into table acid_partitioned2 partition(bkt=1) values(5, 'part one', 1, 'blah'),(6, 'part one', 2, 'doh!') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@acid_partitioned2@bkt=1 +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: acid_partitioned2 PARTITION(bkt=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +PREHOOK: query: select bkt,a,b,c,d from acid_partitioned2 order by a +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_partitioned2 +PREHOOK: Input: default@acid_partitioned2@bkt=1 +PREHOOK: Input: default@acid_partitioned2@bkt=2 +#### A masked pattern was here #### +POSTHOOK: query: select bkt,a,b,c,d from acid_partitioned2 order by a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_partitioned2 +POSTHOOK: Input: default@acid_partitioned2@bkt=1 +POSTHOOK: Input: default@acid_partitioned2@bkt=2 +#### A masked pattern was here #### +2 1 part one 10 str10 +1 1 part one NULL NULL +2 2 part one 20 str20 +1 2 part one NULL NULL +1 3 part two NULL NULL +2 3 part two 30 str30 +2 4 part three 40 str40 +1 4 part three NULL NULL +1 5 part one NULL NULL +1 6 part one NULL NULL