diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index ca254492a1..33b86f23b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.io.orc; +import org.apache.commons.collections.CollectionUtils; import org.apache.hadoop.hive.common.NoDynamicValuesException; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -338,16 +339,13 @@ public static RecordReader createReaderFromFile(Reader file, } public static boolean isOriginal(Reader file) { - return !file.hasMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME); + return !CollectionUtils.isEqualCollection(file.getSchema().getFieldNames(), + OrcRecordUpdater.ALL_ACID_ROWS); } public static boolean isOriginal(Footer footer) { - for(OrcProto.UserMetadataItem item: footer.getMetadataList()) { - if (item.hasName() && item.getName().equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) { - return true; - } - } - return false; + return !CollectionUtils.isEqualCollection(footer.getTypesList().get(0).getFieldNamesList(), + OrcRecordUpdater.ALL_ACID_ROWS); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java index 21fe9ceff3..a90806c8ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java @@ -24,6 +24,7 @@ import java.nio.charset.CharsetDecoder; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Properties; @@ -83,6 +84,20 @@ final static int ROW_ID = 3; final static int CURRENT_WRITEID = 4; public static final int ROW = 5; + final static String OPERATION_FIELD_NAME = "operation"; + final static String ORIGINAL_WRITEID_FIELD_NAME = "originalTransaction"; + final static String BUCKET_FIELD_NAME = "bucket"; + final static String ROW_ID_FIELD_NAME = "rowId"; + final static String CURRENT_WRITEID_FIELD_NAME = "currentTransaction"; + final static String ROW_FIELD_NAME = "row"; + public final static Collection ALL_ACID_ROWS = Arrays.asList( + OrcRecordUpdater.BUCKET_FIELD_NAME, + OrcRecordUpdater.CURRENT_WRITEID_FIELD_NAME, + OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, + OrcRecordUpdater.OPERATION_FIELD_NAME, + OrcRecordUpdater.ROW_FIELD_NAME, + OrcRecordUpdater.ROW_ID_FIELD_NAME); + /** * total number of fields (above) */ @@ -190,17 +205,17 @@ OrcOptions orcOptions(OrcFile.WriterOptions opts) { */ static StructObjectInspector createEventObjectInspector(ObjectInspector rowInspector) { List fields = new ArrayList(); - fields.add(new OrcStruct.Field("operation", + fields.add(new OrcStruct.Field(OPERATION_FIELD_NAME, PrimitiveObjectInspectorFactory.writableIntObjectInspector, OPERATION)); - fields.add(new OrcStruct.Field("originalTransaction", + fields.add(new OrcStruct.Field(ORIGINAL_WRITEID_FIELD_NAME, PrimitiveObjectInspectorFactory.writableLongObjectInspector, ORIGINAL_WRITEID)); - fields.add(new OrcStruct.Field("bucket", + fields.add(new OrcStruct.Field(BUCKET_FIELD_NAME, PrimitiveObjectInspectorFactory.writableIntObjectInspector, BUCKET)); - fields.add(new OrcStruct.Field("rowId", + fields.add(new OrcStruct.Field(ROW_ID_FIELD_NAME, PrimitiveObjectInspectorFactory.writableLongObjectInspector, ROW_ID)); - fields.add(new OrcStruct.Field("currentTransaction", + fields.add(new OrcStruct.Field(CURRENT_WRITEID_FIELD_NAME, PrimitiveObjectInspectorFactory.writableLongObjectInspector, CURRENT_WRITEID)); - fields.add(new OrcStruct.Field("row", rowInspector, ROW)); + fields.add(new OrcStruct.Field(ROW_FIELD_NAME, rowInspector, ROW)); return new OrcStruct.OrcStructInspector(fields); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 1795bb5457..71348a9cdc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -318,11 +318,11 @@ private void setSARG(OrcRawRecordMerger.KeyInterval keyInterval, RecordIdentifier k = keyInterval.getMinKey(); b = SearchArgumentFactory.newBuilder(); b.startAnd() //not(ot < 7) -> ot >=7 - .startNot().lessThan("originalTransaction", + .startNot().lessThan(OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, PredicateLeaf.Type.LONG, k.getWriteId()).end(); b.startNot().lessThan( - "bucket", PredicateLeaf.Type.LONG, minBucketProp).end(); - b.startNot().lessThan("rowId", + OrcRecordUpdater.BUCKET_FIELD_NAME, PredicateLeaf.Type.LONG, minBucketProp).end(); + b.startNot().lessThan(OrcRecordUpdater.ROW_ID_FIELD_NAME, PredicateLeaf.Type.LONG, minRowId).end(); b.end(); } @@ -332,16 +332,20 @@ private void setSARG(OrcRawRecordMerger.KeyInterval keyInterval, b = SearchArgumentFactory.newBuilder(); } b.startAnd().lessThanEquals( - "originalTransaction", PredicateLeaf.Type.LONG, k.getWriteId()); - b.lessThanEquals("bucket", PredicateLeaf.Type.LONG, maxBucketProp); - b.lessThanEquals("rowId", PredicateLeaf.Type.LONG, maxRowId); + OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, PredicateLeaf.Type.LONG, k.getWriteId()); + b.lessThanEquals(OrcRecordUpdater.BUCKET_FIELD_NAME, PredicateLeaf.Type.LONG, maxBucketProp); + b.lessThanEquals(OrcRecordUpdater.ROW_ID_FIELD_NAME, PredicateLeaf.Type.LONG, maxRowId); b.end(); } if(b != null) { deleteEventSarg = b.build(); LOG.info("deleteReader SARG(" + deleteEventSarg + ") "); deleteEventReaderOptions.searchArgument(deleteEventSarg, - new String[] {"originalTransaction", "bucket", "rowId"}); + new String[] { + OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, + OrcRecordUpdater.BUCKET_FIELD_NAME, + OrcRecordUpdater.ROW_ID_FIELD_NAME + }); return; } deleteEventReaderOptions.searchArgument(null, null); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java index 1656a5b80e..bda19badbe 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java @@ -395,8 +395,13 @@ public void testNewBase() throws Exception { typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1) .addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5) .addSubtypes(6); - typeBuilder.addAllFieldNames(Lists.newArrayList("operation", "originalTransaction", "bucket", - "rowId", "currentTransaction", "row")); + typeBuilder.addAllFieldNames(Lists.newArrayList( + OrcRecordUpdater.OPERATION_FIELD_NAME, + OrcRecordUpdater.CURRENT_WRITEID_FIELD_NAME, + OrcRecordUpdater.BUCKET_FIELD_NAME, + OrcRecordUpdater.ROW_ID_FIELD_NAME, + OrcRecordUpdater.CURRENT_WRITEID_FIELD_NAME, + OrcRecordUpdater.ROW_FIELD_NAME)); types.add(typeBuilder.build()); types.add(null); types.add(null); @@ -478,15 +483,15 @@ public void testNewBase() throws Exception { List fields = eventObjectInspector.getAllStructFieldRefs(); assertEquals(OrcRecordUpdater.FIELDS, fields.size()); - assertEquals("operation", + assertEquals(OrcRecordUpdater.OPERATION_FIELD_NAME, fields.get(OrcRecordUpdater.OPERATION).getFieldName()); - assertEquals("currentTransaction", + assertEquals(OrcRecordUpdater.CURRENT_WRITEID_FIELD_NAME, fields.get(OrcRecordUpdater.CURRENT_WRITEID).getFieldName()); - assertEquals("originalTransaction", + assertEquals(OrcRecordUpdater.ORIGINAL_WRITEID_FIELD_NAME, fields.get(OrcRecordUpdater.ORIGINAL_WRITEID).getFieldName()); - assertEquals("bucket", + assertEquals(OrcRecordUpdater.BUCKET_FIELD_NAME, fields.get(OrcRecordUpdater.BUCKET).getFieldName()); - assertEquals("rowId", + assertEquals(OrcRecordUpdater.ROW_FIELD_NAME, fields.get(OrcRecordUpdater.ROW_ID).getFieldName()); StructObjectInspector rowObjectInspector = (StructObjectInspector) fields.get(OrcRecordUpdater.ROW)