diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 334cb31..eeacadb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1073,7 +1073,7 @@ private AcidDirInfo callInternal() throws IOException { private final long blockSize; private final TreeMap locations; private OrcTail orcTail; - private final List readerTypes; + private List readerTypes; private List stripes; private List stripeStats; private List fileTypes; @@ -1406,6 +1406,11 @@ private void populateAndCacheStripeDetails() throws IOException { } TypeDescription readerSchema = OrcUtils.convertTypeFromProtobuf(readerTypes, 0); evolution = new SchemaEvolution(fileSchema, readerSchema, readerIncluded); + if (!isOriginal) { + // The SchemaEvolution class has added the ACID metadata columns. Let's update our + // readerTypes so PPD code will work correctly. + readerTypes = OrcUtils.getOrcTypes(evolution.getReaderSchema()); + } } writerVersion = orcTail.getWriterVersion(); List fileColStats = orcTail.getFooter().getStatisticsList(); @@ -1422,8 +1427,9 @@ private void populateAndCacheStripeDetails() throws IOException { } } } + // File included has ACID columns, so always pass true for isOriginal. projColsUncompressedSize = computeProjectionSize(fileTypes, fileColStats, fileIncluded, - isOriginal); + /* isOriginal */ true); if (!context.footerInSplits) { orcTail = null; } diff --git ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java index af192fb..22b89c5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java +++ ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java @@ -1215,12 +1215,49 @@ public void testNoHistory() throws Exception { hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, true); runStatementOnDriver("insert into " + Table.ACIDTBL + "(a,b) " + makeValuesClause(tableData)); hiveConf.setBoolVar(HiveConf.ConfVars.HIVETESTMODEROLLBACKTXN, false); - + runStatementOnDriver("alter table "+ Table.ACIDTBL + " compact 'MAJOR'"); runWorker(hiveConf); runCleaner(hiveConf); runStatementOnDriver("select count(*) from " + Table.ACIDTBL); } + + @Test + public void testETLSplitStrategyForACID() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVEOPTINDEXFILTER, true); + runStatementOnDriver("insert into " + Table.ACIDTBL + " values(1,2)"); + runStatementOnDriver("alter table " + Table.ACIDTBL + " compact 'MAJOR'"); + runWorker(hiveConf); + List rs = runStatementOnDriver("select * from " + Table.ACIDTBL + " where a = 1"); + int[][] resultData = new int[][] {{1,2}}; + Assert.assertEquals(stringifyValues(resultData), rs); + } + + @Test + public void testAcidWithSchemaEvolution() throws Exception { + hiveConf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, "ETL"); + String tblName = "acidTblWithSchemaEvol"; + runStatementOnDriver("drop table if exists " + tblName); + runStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + + " CLUSTERED BY(a) INTO 2 BUCKETS" + //currently ACID requires table to be bucketed + " STORED AS ORC TBLPROPERTIES ('transactional'='true')"); + + runStatementOnDriver("INSERT INTO " + tblName + " VALUES (1, 'foo'), (2, 'bar')"); + + // Major compact to create a base that has ACID schema. + runStatementOnDriver("ALTER TABLE " + tblName + " COMPACT 'MAJOR'"); + runWorker(hiveConf); + + // Alter table for perform schema evolution. + runStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)"); + + // Validate there is an added NULL for column c. + List rs = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a"); + String[] expectedResult = { "1\tfoo\tNULL", "2\tbar\tNULL" }; + Assert.assertEquals(Arrays.asList(expectedResult), rs); + } + /** * takes raw data and turns it into a string as if from Driver.getResults() * sorts rows in dictionary order