diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 6261a14..969b73b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1159,7 +1159,7 @@ private AcidDirInfo callInternal() throws IOException { private final long blockSize; private final TreeMap locations; private OrcTail orcTail; - private final List readerTypes; + private List readerTypes; private List stripes; private List stripeStats; private List fileTypes; @@ -1492,6 +1492,11 @@ private void populateAndCacheStripeDetails() throws IOException { } TypeDescription readerSchema = OrcUtils.convertTypeFromProtobuf(readerTypes, 0); evolution = new SchemaEvolution(fileSchema, readerSchema, readerIncluded); + if (!isOriginal) { + // The SchemaEvolution class has added the ACID metadata columns. Let's update our + // readerTypes so PPD code will work correctly. + readerTypes = OrcUtils.getOrcTypes(evolution.getReaderSchema()); + } } writerVersion = orcTail.getWriterVersion(); List fileColStats = orcTail.getFooter().getStatisticsList(); @@ -1508,21 +1513,24 @@ private void populateAndCacheStripeDetails() throws IOException { } } } - projColsUncompressedSize = computeProjectionSize(fileTypes, fileColStats, fileIncluded, - isOriginal); + projColsUncompressedSize = computeProjectionSize(fileTypes, fileColStats, fileIncluded); if (!context.footerInSplits) { orcTail = null; } } private long computeProjectionSize(List fileTypes, - List stats, boolean[] fileIncluded, boolean isOriginal) { - final int rootIdx = getRootColumn(isOriginal); + List stats, boolean[] fileIncluded) { List internalColIds = Lists.newArrayList(); - if (fileIncluded != null) { + if (fileIncluded == null) { + // Add all. + for (int i = 0; i < fileTypes.size(); i++) { + internalColIds.add(i); + } + } else { for (int i = 0; i < fileIncluded.length; i++) { if (fileIncluded[i]) { - internalColIds.add(rootIdx + i); + internalColIds.add(i); } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index 08ca9d5..b8125dd 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -1296,6 +1296,41 @@ protected void testACIDwithSchemaEvolutionForVariousTblProperties(String tblProp Assert.assertEquals(Arrays.asList(expectedResult), rs); } + @Test + public void testETLSplitStrategyForACID() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, true); + runStatementOnDriver("insert into " + Table.ACIDTBL + " values(1,2)"); + runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'"); + runWorker(hiveConf); + List rs = runStatementOnDriver("select * from " + Table.ACIDTBL + " where a = 1"); + int[][] resultData = new int[][] {{1,2}}; + Assert.assertEquals(stringifyValues(resultData), rs); + } + + @Test + public void testAcidWithSchemaEvolution() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); + String tblName = "acidTblWithSchemaEvol"; + runStatementOnDriver("drop table if exists " + tblName); + runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + + " CLUSTERED BY(a) INTO 2 BUCKETS" + //currently ACID requires table to be bucketed + " STORED AS ORC TBLPROPERTIES ('transactional'='true')"); + + runStatementOnDriver("INSERT INTO " + tblName + " VALUES (1, 'foo'), (2, 'bar')"); + + // Major compact to create a base that has ACID schema. + runStatementOnDriver("ALTER TABLE " + tblName + " COMPACT 'MAJOR'"); + runWorker(hiveConf); + + // Alter table for perform schema evolution. + runStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)"); + + // Validate there is an added NULL for column c. + List rs = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a"); + String[] expectedResult = { "1\tfoo\tNULL", "2\tbar\tNULL" }; + Assert.assertEquals(Arrays.asList(expectedResult), rs); + } /** * takes raw data and turns it into a string as if from Driver.getResults()