diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e793174..787e25e 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -910,6 +910,10 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "The threshold for the input file size of the small tables; if the file size is smaller \n" + "than this threshold, it will try to convert the common join into map join"), + + HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", false, + "Use schema evolution to convert self-describing file format's data to the schema desired by the reader."), + HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0, "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."), diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java index 5a95467..bc56d77 100644 --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -35,6 +36,10 @@ import org.apache.hadoop.mapred.OutputFormat; import org.apache.hive.hcatalog.common.HCatConstants; import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; @@ -48,6 +53,8 @@ */ public class FosterStorageHandler extends DefaultStorageHandler { + private static final Logger LOG = LoggerFactory.getLogger(FosterStorageHandler.class); + public Configuration conf; /** The directory under which data is initially written for a non partitioned table */ protected static final String TEMP_DIR_NAME = "_TEMP"; @@ -98,6 +105,36 @@ public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { @Override public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + + try { + Map tableProperties = tableDesc.getJobProperties(); + + String jobInfoProperty = tableProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); + if (jobInfoProperty != null) { + + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobInfoProperty); + + HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); + HCatSchema dataColumns = tableInfo.getDataColumns(); + List dataFields = dataColumns.getFields(); + StringBuilder columnNamesSb = new StringBuilder(); + StringBuilder typeNamesSb = new StringBuilder(); + for (HCatFieldSchema dataField : dataFields) { + if (columnNamesSb.length() > 0) { + columnNamesSb.append(","); + typeNamesSb.append(":"); + } + columnNamesSb.append(dataField.getName()); + typeNamesSb.append(dataField.getTypeString()); + } + jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS, columnNamesSb.toString()); + jobProperties.put(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, typeNamesSb.toString()); + + } + } catch (IOException e) { + throw new IllegalStateException("Failed to set output path", e); + } + } @Override diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java index 1f23f3f..7ec6ae3 100644 --- hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java @@ -182,9 +182,11 @@ private void readObject(ObjectInputStream ois) ObjectInputStream partInfoReader = new ObjectInputStream(new InflaterInputStream(ois)); partitions = (List)partInfoReader.readObject(); - for (PartInfo partInfo : partitions) { - if (partInfo.getTableInfo() == null) { - partInfo.setTableInfo(this.tableInfo); + if (partitions != null) { + for (PartInfo partInfo : partitions) { + if (partInfo.getTableInfo() == null) { + partInfo.setTableInfo(this.tableInfo); + } } } } diff --git hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java index 3458b65..ff2598f 100644 --- hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java +++ hcatalog/streaming/src/test/org/apache/hive/hcatalog/streaming/TestStreaming.java @@ -464,6 +464,8 @@ private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int JobConf job = new JobConf(); job.set("mapred.input.dir", partitionPath.toString()); job.set("bucket_count", Integer.toString(buckets)); + job.set("columns", "id,msg"); + job.set("columns.types", "bigint:string"); job.set(ValidTxnList.VALID_TXNS_KEY, txns.toString()); InputSplit[] splits = inf.getSplits(job, buckets); Assert.assertEquals(buckets, splits.length); diff --git itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java index e2910dd..dabe434 100644 --- itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java +++ itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java @@ -128,7 +128,194 @@ public void tearDown() { driver.close(); } } - + + /** + * Simple schema evolution add columns with partitioning. + * @throws Exception + */ + @Test + public void schemaEvolutionAddColDynamicPartitioningInsert() throws Exception { + String tblName = "dpct"; + List colNames = Arrays.asList("a", "b"); + executeStatementOnDriver("drop table if exists " + tblName, driver); + executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + + " PARTITIONED BY(ds string)" + + " CLUSTERED BY(a) INTO 2 BUCKETS" + //currently ACID requires table to be bucketed + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver); + + // First INSERT round. + executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + + "'today'), (2, 'wilma', 'yesterday')", driver); + + // ALTER TABLE ... ADD COLUMNS + executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver); + + // Validate there is an added NULL for column c. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + ArrayList valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(2, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tfred\tNULL\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\twilma\tNULL\tyesterday", valuesReadFromHiveDriver.get(1)); + + // Second INSERT round with new inserts into previously existing partition 'yesterday'. + executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + + "(5, 'doc', 1902, 'yesterday')", + driver); + + // Validate there the new insertions for column c. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(5, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tfred\tNULL\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\twilma\tNULL\tyesterday", valuesReadFromHiveDriver.get(1)); + Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2)); + Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3)); + Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4)); + + Initiator initiator = new Initiator(); + initiator.setThreadId((int)initiator.getId()); + conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0); + initiator.setHiveConf(conf); + AtomicBoolean stop = new AtomicBoolean(); + stop.set(true); + initiator.init(stop, new AtomicBoolean()); + initiator.run(); + + CompactionTxnHandler txnHandler = new CompactionTxnHandler(conf); + ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); + List compacts = rsp.getCompacts(); + Assert.assertEquals(4, compacts.size()); + SortedSet partNames = new TreeSet(); + for (int i = 0; i < compacts.size(); i++) { + Assert.assertEquals("default", compacts.get(i).getDbname()); + Assert.assertEquals(tblName, compacts.get(i).getTablename()); + Assert.assertEquals("initiated", compacts.get(i).getState()); + partNames.add(compacts.get(i).getPartitionname()); + } + List names = new ArrayList(partNames); + Assert.assertEquals("ds=last_century", names.get(0)); + Assert.assertEquals("ds=soon", names.get(1)); + Assert.assertEquals("ds=today", names.get(2)); + Assert.assertEquals("ds=yesterday", names.get(3)); + + // Validate after compaction. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(5, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tfred\tNULL\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\twilma\tNULL\tyesterday", valuesReadFromHiveDriver.get(1)); + Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2)); + Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3)); + Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4)); + + } + + @Test + public void schemaEvolutionAddColDynamicPartitioningUpdate() throws Exception { + String tblName = "udpct"; + List colNames = Arrays.asList("a", "b"); + executeStatementOnDriver("drop table if exists " + tblName, driver); + executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + + " PARTITIONED BY(ds string)" + + " CLUSTERED BY(a) INTO 2 BUCKETS" + //currently ACID requires table to be bucketed + " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver); + executeStatementOnDriver("insert into " + tblName + " partition (ds) values (1, 'fred', " + + "'today'), (2, 'wilma', 'yesterday')", driver); + + executeStatementOnDriver("update " + tblName + " set b = 'barney'", driver); + + // Validate the update. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + ArrayList valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(2, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tbarney\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\tbarney\tyesterday", valuesReadFromHiveDriver.get(1)); + + // ALTER TABLE ... ADD COLUMNS + executeStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(c int)", driver); + + // Validate there is an added NULL for column c. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(2, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1)); + + // Second INSERT round with new inserts into previously existing partition 'yesterday'. + executeStatementOnDriver("insert into " + tblName + " partition (ds) values " + + "(3, 'mark', 1900, 'soon'), (4, 'douglas', 1901, 'last_century'), " + + "(5, 'doc', 1902, 'yesterday')", + driver); + + // Validate there the new insertions for column c. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(5, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tbarney\tNULL\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\tbarney\tNULL\tyesterday", valuesReadFromHiveDriver.get(1)); + Assert.assertEquals("3\tmark\t1900\tsoon", valuesReadFromHiveDriver.get(2)); + Assert.assertEquals("4\tdouglas\t1901\tlast_century", valuesReadFromHiveDriver.get(3)); + Assert.assertEquals("5\tdoc\t1902\tyesterday", valuesReadFromHiveDriver.get(4)); + + executeStatementOnDriver("update " + tblName + " set c = 2000", driver); + + // Validate the update of new column c, even in old rows. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(5, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1)); + Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2)); + Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3)); + Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4)); + + Initiator initiator = new Initiator(); + initiator.setThreadId((int)initiator.getId()); + // Set to 1 so insert doesn't set it off but update does + conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 1); + initiator.setHiveConf(conf); + AtomicBoolean stop = new AtomicBoolean(); + stop.set(true); + initiator.init(stop, new AtomicBoolean()); + initiator.run(); + + CompactionTxnHandler txnHandler = new CompactionTxnHandler(conf); + ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); + List compacts = rsp.getCompacts(); + Assert.assertEquals(4, compacts.size()); + SortedSet partNames = new TreeSet(); + for (int i = 0; i < compacts.size(); i++) { + Assert.assertEquals("default", compacts.get(i).getDbname()); + Assert.assertEquals(tblName, compacts.get(i).getTablename()); + Assert.assertEquals("initiated", compacts.get(i).getState()); + partNames.add(compacts.get(i).getPartitionname()); + } + List names = new ArrayList(partNames); + Assert.assertEquals("ds=last_century", names.get(0)); + Assert.assertEquals("ds=soon", names.get(1)); + Assert.assertEquals("ds=today", names.get(2)); + Assert.assertEquals("ds=yesterday", names.get(3)); + + // Validate after compaction. + executeStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a", driver); + valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + Assert.assertEquals(5, valuesReadFromHiveDriver.size()); + Assert.assertEquals("1\tbarney\t2000\ttoday", valuesReadFromHiveDriver.get(0)); + Assert.assertEquals("2\tbarney\t2000\tyesterday", valuesReadFromHiveDriver.get(1)); + Assert.assertEquals("3\tmark\t2000\tsoon", valuesReadFromHiveDriver.get(2)); + Assert.assertEquals("4\tdouglas\t2000\tlast_century", valuesReadFromHiveDriver.get(3)); + Assert.assertEquals("5\tdoc\t2000\tyesterday", valuesReadFromHiveDriver.get(4)); + } + /** * After each major compaction, stats need to be updated on each column of the * table/partition which previously had stats. @@ -255,7 +442,9 @@ public void testStatsAfterCompactionPartTbl() throws Exception { t.run(); ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); List compacts = rsp.getCompacts(); - Assert.assertEquals(1, compacts.size()); + if (1 != compacts.size()) { + Assert.fail("Expecting 1 file and found " + compacts.size() + " files " + compacts.toString()); + } Assert.assertEquals("ready for cleaning", compacts.get(0).getState()); stats = msClient.getPartitionColumnStatistics(ci.dbname, ci.tableName, @@ -409,6 +598,8 @@ public void minorCompactWhileStreaming() throws Exception { String dbName = "default"; String tblName = "cws"; List colNames = Arrays.asList("a", "b"); + String columnNamesProperty = "a,b"; + String columnTypesProperty = "int:string"; executeStatementOnDriver("drop table if exists " + tblName, driver); executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed @@ -452,9 +643,12 @@ public void minorCompactWhileStreaming() throws Exception { } } Arrays.sort(names); - Assert.assertArrayEquals(names, new String[]{"delta_0000001_0000002", - "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006"}); - checkExpectedTxnsPresent(null, new Path[]{resultFile}, 0, 1L, 4L); + String[] expected = new String[]{"delta_0000001_0000002", + "delta_0000001_0000004", "delta_0000003_0000004", "delta_0000005_0000006"}; + if (!Arrays.deepEquals(expected, names)) { + Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names)); + } + checkExpectedTxnsPresent(null, new Path[]{resultFile},columnNamesProperty, columnTypesProperty, 0, 1L, 4L); } finally { connection.close(); @@ -466,6 +660,8 @@ public void majorCompactWhileStreaming() throws Exception { String dbName = "default"; String tblName = "cws"; List colNames = Arrays.asList("a", "b"); + String columnNamesProperty = "a,b"; + String columnTypesProperty = "int:string"; executeStatementOnDriver("drop table if exists " + tblName, driver); executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed @@ -500,10 +696,12 @@ public void majorCompactWhileStreaming() throws Exception { FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter); - Assert.assertEquals(1, stat.length); + if (1 != stat.length) { + Assert.fail("Expecting 1 file \"base_0000004\" and found " + stat.length + " files " + Arrays.toString(stat)); + } String name = stat[0].getPath().getName(); Assert.assertEquals(name, "base_0000004"); - checkExpectedTxnsPresent(stat[0].getPath(), null, 0, 1L, 4L); + checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L); } finally { connection.close(); } @@ -514,6 +712,8 @@ public void minorCompactAfterAbort() throws Exception { String dbName = "default"; String tblName = "cws"; List colNames = Arrays.asList("a", "b"); + String columnNamesProperty = "a,b"; + String columnTypesProperty = "int:string"; executeStatementOnDriver("drop table if exists " + tblName, driver); executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed @@ -561,9 +761,12 @@ public void minorCompactAfterAbort() throws Exception { } } Arrays.sort(names); - Assert.assertArrayEquals(names, new String[]{"delta_0000001_0000002", - "delta_0000001_0000006", "delta_0000003_0000004", "delta_0000005_0000006"}); - checkExpectedTxnsPresent(null, new Path[]{resultDelta}, 0, 1L, 4L); + String[] expected = new String[]{"delta_0000001_0000002", + "delta_0000001_0000006", "delta_0000003_0000004", "delta_0000005_0000006"}; + if (!Arrays.deepEquals(expected, names)) { + Assert.fail("Expected: " + Arrays.toString(expected) + ", found: " + Arrays.toString(names)); + } + checkExpectedTxnsPresent(null, new Path[]{resultDelta}, columnNamesProperty, columnTypesProperty, 0, 1L, 4L); } finally { connection.close(); } @@ -574,6 +777,8 @@ public void majorCompactAfterAbort() throws Exception { String dbName = "default"; String tblName = "cws"; List colNames = Arrays.asList("a", "b"); + String columnNamesProperty = "a,b"; + String columnTypesProperty = "int:string"; executeStatementOnDriver("drop table if exists " + tblName, driver); executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " + " CLUSTERED BY(a) INTO 1 BUCKETS" + //currently ACID requires table to be bucketed @@ -613,10 +818,17 @@ public void majorCompactAfterAbort() throws Exception { FileSystem fs = FileSystem.get(conf); FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()), AcidUtils.baseFileFilter); - Assert.assertEquals(1, stat.length); + if (1 != stat.length) { + Assert.fail("majorCompactAfterAbort FileStatus[] stat " + Arrays.toString(stat)); + } + if (1 != stat.length) { + Assert.fail("Expecting 1 file \"base_0000006\" and found " + stat.length + " files " + Arrays.toString(stat)); + } String name = stat[0].getPath().getName(); - Assert.assertEquals(name, "base_0000006"); - checkExpectedTxnsPresent(stat[0].getPath(), null, 0, 1L, 4L); + if (!name.equals("base_0000006")) { + Assert.fail("majorCompactAfterAbort name " + name + " not equals to base_0000006"); + } + checkExpectedTxnsPresent(stat[0].getPath(), null, columnNamesProperty, columnTypesProperty, 0, 1L, 4L); } finally { connection.close(); } @@ -642,7 +854,8 @@ private void writeBatch(StreamingConnection connection, DelimitedInputWriter wri } } - private void checkExpectedTxnsPresent(Path base, Path[] deltas, int bucket, long min, long max) + private void checkExpectedTxnsPresent(Path base, Path[] deltas, String columnNamesProperty, + String columnTypesProperty, int bucket, long min, long max) throws IOException { ValidTxnList txnList = new ValidTxnList() { @Override @@ -678,8 +891,11 @@ public long getHighWatermark() { OrcInputFormat aif = new OrcInputFormat(); + Configuration conf = new Configuration(); + conf.set("columns", columnNamesProperty); + conf.set("columns.types", columnTypesProperty); AcidInputFormat.RawReader reader = - aif.getRawReader(new Configuration(), false, bucket, txnList, base, deltas); + aif.getRawReader(conf, false, bucket, txnList, base, deltas); RecordIdentifier identifier = reader.createKey(); OrcStruct value = reader.createValue(); long currentTxn = min; diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 1c8a80d..290cff2 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -156,6 +156,22 @@ minitez.query.files.shared=acid_globallimit.q,\ ptf_matchpath.q,\ ptf_streaming.q,\ sample1.q,\ + schema_evol_text_nonvec_mapwork_table.q,\ + schema_evol_text_nonvec_fetchwork_table.q,\ + schema_evol_orc_nonvec_fetchwork_part.q,\ + schema_evol_orc_nonvec_mapwork_part.q,\ + schema_evol_text_nonvec_fetchwork_part.q,\ + schema_evol_text_nonvec_mapwork_part.q,\ + schema_evol_orc_acid_mapwork_part.q,\ + schema_evol_orc_acid_mapwork_table.q,\ + schema_evol_orc_acidvec_mapwork_table.q,\ + schema_evol_orc_acidvec_mapwork_part.q,\ + schema_evol_orc_vec_mapwork_part.q,\ + schema_evol_text_fetchwork_table.q,\ + schema_evol_text_mapwork_table.q,\ + schema_evol_orc_vec_mapwork_table.q,\ + schema_evol_orc_nonvec_mapwork_table.q,\ + schema_evol_orc_nonvec_fetchwork_table.q,\ selectDistinctStar.q,\ script_env_var1.q,\ script_env_var2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 34461ed..4b4495f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -497,8 +497,13 @@ + "data, set " + HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA + " to true"), INVALID_FILE_FORMAT_IN_LOAD(30019, "The file that you are trying to load does not match the" + - " file format of the destination table.") + " file format of the destination table."), + SCHEMA_REQUIRED_TO_READ_ACID_TABLES(30020, "Neither the configuration variables " + + "schema.evolution.columns / schema.evolution.columns.types " + + "nor the " + + "columns / columns.types " + + "are set. Table schema information is required to read ACID tables") ; private int errorCode; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 258d28e..b6e5739 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -133,6 +133,10 @@ public FetchOperator(FetchWork work, JobConf job, Operator operator, this.job = job; this.work = work; this.operator = operator; + if (operator instanceof TableScanOperator) { + Utilities.addTableSchemaToConf(job, + (TableScanOperator) operator); + } this.vcCols = vcCols; this.hasVC = vcCols != null && !vcCols.isEmpty(); this.isStatReader = work.getTblDesc() == null; @@ -598,6 +602,10 @@ private StructObjectInspector getPartitionedRowOI(StructObjectInspector valueOI) } private boolean needConversion(PartitionDesc partitionDesc) { + boolean isAcid = AcidUtils.isTablePropertyTransactional(partitionDesc.getTableDesc().getProperties()); + if (Utilities.isSchemaEvolutionEnabled(job, isAcid) && Utilities.isInputFileFormatSelfDescribing(partitionDesc)) { + return false; + } return needConversion(partitionDesc.getTableDesc(), Arrays.asList(partitionDesc)); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index f8717ae..afc03ed 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -38,8 +38,10 @@ import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.MapOperator.MapOpCtx; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -63,6 +65,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; @@ -200,8 +203,14 @@ private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, opCtx.partName = String.valueOf(partSpec); opCtx.deserializer = pd.getDeserializer(hconf); - StructObjectInspector partRawRowObjectInspector = - (StructObjectInspector) opCtx.deserializer.getObjectInspector(); + StructObjectInspector partRawRowObjectInspector; + boolean isAcid = AcidUtils.isTablePropertyTransactional(td.getProperties()); + if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) { + partRawRowObjectInspector = tableRowOI; + } else { + partRawRowObjectInspector = + (StructObjectInspector) opCtx.deserializer.getObjectInspector(); + } opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter(partRawRowObjectInspector, tableRowOI); @@ -302,8 +311,16 @@ private MapOpCtx initObjectInspector(Configuration hconf, MapOpCtx opCtx, PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); TableDesc tableDesc = pd.getTableDesc(); Deserializer partDeserializer = pd.getDeserializer(hconf); - StructObjectInspector partRawRowObjectInspector = - (StructObjectInspector) partDeserializer.getObjectInspector(); + + StructObjectInspector partRawRowObjectInspector; + boolean isAcid = AcidUtils.isTablePropertyTransactional(tableDesc.getProperties()); + if (Utilities.isSchemaEvolutionEnabled(hconf, isAcid) && Utilities.isInputFileFormatSelfDescribing(pd)) { + Deserializer tblDeserializer = tableDesc.getDeserializer(hconf); + partRawRowObjectInspector = (StructObjectInspector) tblDeserializer.getObjectInspector(); + } else { + partRawRowObjectInspector = + (StructObjectInspector) partDeserializer.getObjectInspector(); + } StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc); if ((tblRawRowObjectInspector == null) || diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index cbf02e9..d98ea84 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -70,6 +70,13 @@ private String defaultPartitionName; + /** + * These values are saved during MapWork, FetchWork, etc preparation and later added to the the + * JobConf of each task. + */ + private String schemaEvolutionColumns; + private String schemaEvolutionColumnsTypes; + public TableDesc getTableDesc() { return tableDesc; } @@ -78,6 +85,19 @@ public void setTableDesc(TableDesc tableDesc) { this.tableDesc = tableDesc; } + public void setSchemaEvolution(String schemaEvolutionColumns, String schemaEvolutionColumnsTypes) { + this.schemaEvolutionColumns = schemaEvolutionColumns; + this.schemaEvolutionColumnsTypes = schemaEvolutionColumnsTypes; + } + + public String getSchemaEvolutionColumns() { + return schemaEvolutionColumns; + } + + public String getSchemaEvolutionColumnsTypes() { + return schemaEvolutionColumnsTypes; + } + /** * Other than gathering statistics for the ANALYZE command, the table scan operator * does not do anything special other than just forwarding the row. Since the table diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 3352b49..1d8e3b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -107,6 +107,7 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -122,15 +123,19 @@ import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.exec.tez.DagUtils; import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; +import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat; import org.apache.hadoop.hive.ql.io.RCFile; import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat; +import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface; import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper; @@ -175,6 +180,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SequenceFile; @@ -478,11 +489,6 @@ private static BaseWork getBaseWork(Configuration conf, String name) { } } - public static Map getMapWorkVectorScratchColumnTypeMap(Configuration hiveConf) { - MapWork mapWork = getMapWork(hiveConf); - return mapWork.getVectorScratchColumnTypeMap(); - } - public static void setWorkflowAdjacencies(Configuration conf, QueryPlan plan) { try { Graph stageGraph = plan.getQueryPlan().getStageGraph(); @@ -3901,6 +3907,27 @@ public static boolean isVectorMode(Configuration conf) { return false; } + /** + * @param conf + * @return the configured VectorizedRowBatchCtx for a MapWork task. + */ + public static VectorizedRowBatchCtx getVectorizedRowBatchCtx(Configuration conf) { + VectorizedRowBatchCtx result = null; + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) && + Utilities.getPlanPath(conf) != null) { + MapWork mapWork = Utilities.getMapWork(conf); + if (mapWork != null && mapWork.getVectorMode()) { + result = mapWork.getVectorizedRowBatchCtx(); + } + } + return result; + } + + public static boolean isVectorMode(Configuration conf, MapWork mapWork) { + return HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) + && mapWork.getVectorMode(); + } + public static void clearWorkMapForConf(Configuration conf) { // Remove cached query plans for the current query only Path mapPath = getPlanPath(conf, MAP_PLAN_NAME); @@ -4057,4 +4084,77 @@ public static boolean isPerfOrAboveLogging(HiveConf conf) { (loggingLevel.equalsIgnoreCase("PERFORMANCE") || loggingLevel.equalsIgnoreCase("VERBOSE")); } + public static boolean isSchemaEvolutionEnabled(Configuration conf, boolean isAcid) { + return isAcid || HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION); + } + + public static boolean isInputFileFormatSelfDescribing(PartitionDesc pd) { + Class inputFormatClass = pd.getInputFileFormatClass(); + return SelfDescribingInputFormatInterface.class.isAssignableFrom(inputFormatClass); + } + + public static boolean isInputFileFormatVectorized(PartitionDesc pd) { + Class inputFormatClass = pd.getInputFileFormatClass(); + return VectorizedInputFormatInterface.class.isAssignableFrom(inputFormatClass); + } + + public static void addSchemaEvolutionToTableScanOperator(Table table, + TableScanOperator tableScanOp) { + String colNames = MetaStoreUtils.getColumnNamesFromFieldSchema(table.getSd().getCols()); + String colTypes = MetaStoreUtils.getColumnTypesFromFieldSchema(table.getSd().getCols()); + tableScanOp.setSchemaEvolution(colNames, colTypes); + } + + public static void addSchemaEvolutionToTableScanOperator(StructObjectInspector structOI, + TableScanOperator tableScanOp) { + String colNames = ObjectInspectorUtils.getFieldNames(structOI); + String colTypes = ObjectInspectorUtils.getFieldTypes(structOI); + tableScanOp.setSchemaEvolution(colNames, colTypes); + } + + public static void unsetSchemaEvolution(Configuration conf) { + conf.unset(IOConstants.SCHEMA_EVOLUTION_COLUMNS); + conf.unset(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES); + } + + public static void addTableSchemaToConf(Configuration conf, + TableScanOperator tableScanOp) { + String schemaEvolutionColumns = tableScanOp.getSchemaEvolutionColumns(); + if (schemaEvolutionColumns != null) { + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, tableScanOp.getSchemaEvolutionColumns()); + conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, tableScanOp.getSchemaEvolutionColumnsTypes()); + } else { + LOG.info("schema.evolution.columns and schema.evolution.columns.types not available"); + } + } + + /** + * Create row key and value object inspectors for reduce vectorization. + * The row object inspector used by ReduceWork needs to be a **standard** + * struct object inspector, not just any struct object inspector. + * @param keyInspector + * @param valueInspector + * @return OI + * @throws HiveException + */ + public static StandardStructObjectInspector constructVectorizedReduceRowOI( + StructObjectInspector keyInspector, StructObjectInspector valueInspector) + throws HiveException { + + ArrayList colNames = new ArrayList(); + ArrayList ois = new ArrayList(); + List fields = keyInspector.getAllStructFieldRefs(); + for (StructField field: fields) { + colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); + ois.add(field.getFieldObjectInspector()); + } + fields = valueInspector.getAllStructFieldRefs(); + for (StructField field: fields) { + colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); + ois.add(field.getFieldObjectInspector()); + } + StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); + + return rowObjectInspector; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index ac5e3ca..f9e10c4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; @@ -153,10 +154,6 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws /* vectorization only works with struct object inspectors */ valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag]; - ObjectPair pair = VectorizedBatchUtil. - constructVectorizedRowBatch(keyStructInspector, - valueStructInspectors[tag], gWork.getVectorScratchColumnTypeMap()); - batches[tag] = pair.getFirst(); final int totalColumns = keysColumnOffset + valueStructInspectors[tag].getAllStructFieldRefs().size(); valueStringWriters[tag] = new ArrayList(totalColumns); @@ -165,7 +162,11 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory .genVectorStructExpressionWritables(valueStructInspectors[tag]))); - rowObjectInspector[tag] = pair.getSecond(); + rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, + valueStructInspectors[tag]); + batches[tag] = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch(); + + } else { ois.add(keyObjectInspector); ois.add(valueObjectInspector[tag]); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index d649672..5edd587 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -237,7 +237,7 @@ private void initializeSourceForTag(ReduceWork redWork, int tag, ObjectInspector boolean vectorizedRecordSource = (tag == bigTablePosition) && redWork.getVectorMode(); sources[tag].init(jconf, redWork.getReducer(), vectorizedRecordSource, keyTableDesc, valueTableDesc, reader, tag == bigTablePosition, (byte) tag, - redWork.getVectorScratchColumnTypeMap()); + redWork.getVectorizedRowBatchCtx()); ois[tag] = sources[tag].getObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index b634877..b1d2f52 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; import org.apache.hadoop.hive.ql.log.PerfLogger; @@ -51,7 +52,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.mapred.JobConf; @@ -123,7 +123,7 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, - Map vectorScratchColumnTypeMap) + VectorizedRowBatchCtx batchContext) throws Exception { ObjectInspector keyObjectInspector; @@ -174,10 +174,9 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT .asList(VectorExpressionWriterFactory .genVectorStructExpressionWritables(valueStructInspectors))); - ObjectPair pair = - VectorizedBatchUtil.constructVectorizedRowBatch(keyStructInspector, valueStructInspectors, vectorScratchColumnTypeMap); - rowObjectInspector = pair.getSecond(); - batch = pair.getFirst(); + rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, + valueStructInspectors); + batch = batchContext.createVectorizedRowBatch(); // Setup vectorized deserialization for the key and value. BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer; @@ -185,7 +184,7 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT keyBinarySortableDeserializeToRow = new VectorDeserializeRow( new BinarySortableDeserializeRead( - VectorizedBatchUtil.primitiveTypeInfosFromStructObjectInspector( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( keyStructInspector), binarySortableSerDe.getSortOrders())); keyBinarySortableDeserializeToRow.init(0); @@ -195,7 +194,7 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT valueLazyBinaryDeserializeToRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - VectorizedBatchUtil.primitiveTypeInfosFromStructObjectInspector( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( valueStructInspectors))); valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 8ec7ead..99744cd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -18,11 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.util.Arrays; - -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; /** * This class supports string and binary data by value reference -- i.e. each field is @@ -51,9 +46,6 @@ private byte[] buffer; // optional buffer to use when actually copying in data private int nextFree; // next free position in buffer - // Reusable text object - private final Text textObject = new Text(); - // Estimate that there will be 16 bytes per entry static final int DEFAULT_BUFFER_SIZE = 16 * VectorizedRowBatch.DEFAULT_SIZE; @@ -165,6 +157,19 @@ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { } /** + * Set a field by actually copying in to a local buffer. + * If you must actually copy data in to the array, use this method. + * DO NOT USE this method unless it's not practical to set data by reference with setRef(). + * Setting data by reference tends to run a lot faster than copying data in. + * + * @param elementNum index within column vector to set + * @param sourceBuf container of source data + */ + public void setVal(int elementNum, byte[] sourceBuf) { + setVal(elementNum, sourceBuf, 0, sourceBuf.length); + } + + /** * Set a field to the concatenation of two string values. Result data is copied * into the internal buffer. * @@ -215,22 +220,6 @@ public void increaseBufferSpace(int nextElemLength) { buffer = newBuffer; } - @Override - public Writable getWritableObject(int index) { - if (this.isRepeating) { - index = 0; - } - Writable result = null; - if (!isNull[index] && vector[index] != null) { - textObject.clear(); - textObject.append(vector[index], start[index], length[index]); - result = textObject; - } else { - result = NullWritable.get(); - } - return result; - } - /** Copy the current object contents into the output. Only copy selected entries, * as indicated by selectedInUse and the sel array. */ @@ -294,7 +283,7 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { // Only copy data values if entry is not null. The string value // at position 0 is undefined if the position 0 value is null. - if (noNulls || (!noNulls && !isNull[0])) { + if (noNulls || !isNull[0]) { // loops start at position 1 because position 0 is already set if (selectedInUse) { @@ -320,14 +309,70 @@ public void fill(byte[] value) { setRef(0, value, 0, value.length); } + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = null; + isNull[0] = true; + } + @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - BytesColumnVector in = (BytesColumnVector) inputVector; - setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]); + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + BytesColumnVector in = (BytesColumnVector) inputVector; + setVal(outElementNum, in.vector[inputElementNum], + in.start[inputElementNum], in.length[inputElementNum]); + } else { + isNull[outElementNum] = true; + noNulls = false; + } } @Override public void init() { initBuffer(0); } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('"'); + buffer.append(new String(this.buffer, start[row], length[row])); + buffer.append('"'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + int[] oldStart = start; + start = new int[size]; + int[] oldLength = length; + length = new int[size]; + byte[][] oldVector = vector; + vector = new byte[size][]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldVector[0]; + start[0] = oldStart[0]; + length[0] = oldLength[0]; + } else { + System.arraycopy(oldVector, 0, vector, 0, oldVector.length); + System.arraycopy(oldStart, 0, start, 0 , oldStart.length); + System.arraycopy(oldLength, 0, length, 0, oldLength.length); + } + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 6654166..fcb1ae9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -18,10 +18,9 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.io.IOException; import java.util.Arrays; -import org.apache.hadoop.io.Writable; - /** * ColumnVector contains the shared structure for the sub-types, * including NULL information, and whether this vector @@ -38,10 +37,15 @@ * The current kinds of column vectors. */ public static enum Type { + NONE, // Useful when the type of column vector has not be determined yet. LONG, DOUBLE, BYTES, - DECIMAL + DECIMAL, + STRUCT, + LIST, + MAP, + UNION } /* @@ -64,8 +68,6 @@ private boolean preFlattenIsRepeating; private boolean preFlattenNoNulls; - public abstract Writable getWritableObject(int index); - /** * Constructor for super-class ColumnVector. This is not called directly, * but used to initialize inherited fields. @@ -76,28 +78,42 @@ public ColumnVector(int len) { isNull = new boolean[len]; noNulls = true; isRepeating = false; + preFlattenNoNulls = true; + preFlattenIsRepeating = false; } /** - * Resets the column to default state - * - fills the isNull array with false - * - sets noNulls to true - * - sets isRepeating to false - */ - public void reset() { - if (false == noNulls) { - Arrays.fill(isNull, false); - } - noNulls = true; - isRepeating = false; + * Resets the column to default state + * - fills the isNull array with false + * - sets noNulls to true + * - sets isRepeating to false + */ + public void reset() { + if (!noNulls) { + Arrays.fill(isNull, false); } + noNulls = true; + isRepeating = false; + preFlattenNoNulls = true; + preFlattenIsRepeating = false; + } + + /** + * Sets the isRepeating flag. Recurses over structs and unions so that the + * flags are set correctly. + * @param isRepeating + */ + public void setRepeating(boolean isRepeating) { + this.isRepeating = isRepeating; + } - abstract public void flatten(boolean selectedInUse, int[] sel, int size); + abstract public void flatten(boolean selectedInUse, int[] sel, int size); // Simplify vector by brute-force flattening noNulls if isRepeating // This can be used to reduce combinatorial explosion of code paths in VectorExpressions // with many arguments. - public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { + protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel, + int size) { boolean nullFillValue; @@ -120,13 +136,13 @@ public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { noNulls = false; } - public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) { + protected void flattenNoNulls(boolean selectedInUse, int[] sel, + int size) { if (noNulls) { noNulls = false; if (selectedInUse) { for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = false; + isNull[sel[j]] = false; } } else { Arrays.fill(isNull, 0, size, false); @@ -155,8 +171,10 @@ protected void flattenPush() { /** * Set the element in this column vector from the given input vector. + * This method can assume that the output does not have isRepeating set. */ - public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector); + public abstract void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector); /** * Initialize the column vector. This method can be overridden by specific column vector types. @@ -166,5 +184,33 @@ protected void flattenPush() { public void init() { // Do nothing by default } - } + /** + * Ensure the ColumnVector can hold at least size values. + * This method is deliberately *not* recursive because the complex types + * can easily have more (or less) children than the upper levels. + * @param size the new minimum size + * @param presesrveData should the old data be preserved? + */ + public void ensureSize(int size, boolean presesrveData) { + if (isNull.length < size) { + boolean[] oldArray = isNull; + isNull = new boolean[size]; + if (presesrveData && !noNulls) { + if (isRepeating) { + isNull[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, isNull, 0, oldArray.length); + } + } + } + } + + /** + * Print the value for this column into the given string builder. + * @param buffer the buffer to print into + * @param row the id of the row to print + */ + public abstract void stringifyValue(StringBuilder buffer, + int row); + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 5009a42..fe8ad85 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -17,14 +17,10 @@ */ package org.apache.hadoop.hive.ql.exec.vector; - import java.math.BigInteger; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; public class DecimalColumnVector extends ColumnVector { @@ -39,8 +35,6 @@ public short scale; public short precision; - private final HiveDecimalWritable writableObj = new HiveDecimalWritable(); - public DecimalColumnVector(int precision, int scale) { this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale); } @@ -49,26 +43,31 @@ public DecimalColumnVector(int size, int precision, int scale) { super(size); this.precision = (short) precision; this.scale = (short) scale; - final int len = size; - vector = new HiveDecimalWritable[len]; - for (int i = 0; i < len; i++) { + vector = new HiveDecimalWritable[size]; + for (int i = 0; i < size; i++) { vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); } } - @Override - public Writable getWritableObject(int index) { - if (isRepeating) { - index = 0; - } - if (!noNulls && isNull[index]) { - return NullWritable.get(); + // Fill the all the vector entries with provided value + public void fill(HiveDecimal value) { + noNulls = true; + isRepeating = true; + if (vector[0] == null) { + vector[0] = new HiveDecimalWritable(value); } else { - writableObj.set(vector[index]); - return writableObj; + vector[0].set(value); } } + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = null; + isNull[0] = true; + } + @Override public void flatten(boolean selectedInUse, int[] sel, int size) { // TODO Auto-generated method stub @@ -76,12 +75,35 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - HiveDecimal hiveDec = ((DecimalColumnVector) inputVector).vector[inputElementNum].getHiveDecimal(precision, scale); - if (hiveDec == null) { - noNulls = false; + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + HiveDecimal hiveDec = + ((DecimalColumnVector) inputVector).vector[inputElementNum] + .getHiveDecimal(precision, scale); + if (hiveDec == null) { + isNull[outElementNum] = true; + noNulls = false; + } else { + isNull[outElementNum] = false; + vector[outElementNum].set(hiveDec); + } + } else { isNull[outElementNum] = true; + noNulls = false; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row].toString()); } else { - vector[outElementNum].set(hiveDec); + buffer.append("null"); } } @@ -110,4 +132,20 @@ public void setNullDataValue(int elementNum) { HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale); vector[elementNum].set(minimumNonZeroValue); } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + HiveDecimalWritable[] oldArray = vector; + vector = new HiveDecimalWritable[size]; + if (preserveData) { + // we copy all of the values to avoid creating more objects + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + for(int i= oldArray.length; i < vector.length; ++i) { + vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); + } + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 525b3c5..41dc3e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -17,12 +17,9 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.io.IOException; import java.util.Arrays; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; - /** * This class represents a nullable double precision floating point column vector. * This class will be used for operations on all floating point types (float, double) @@ -36,7 +33,6 @@ */ public class DoubleColumnVector extends ColumnVector { public double[] vector; - private final DoubleWritable writableObj = new DoubleWritable(); public static final double NULL_VALUE = Double.NaN; /** @@ -57,19 +53,6 @@ public DoubleColumnVector(int len) { vector = new double[len]; } - @Override - public Writable getWritableObject(int index) { - if (this.isRepeating) { - index = 0; - } - if (!noNulls && isNull[index]) { - return NullWritable.get(); - } else { - writableObj.set(vector[index]); - return writableObj; - } - } - // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. public void copySelected( @@ -121,6 +104,14 @@ public void fill(double value) { vector[0] = value; } + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = NULL_VALUE; + isNull[0] = true; + } + // Simplify vector by brute-force flattening noNulls and isRepeating // This can be used to reduce combinatorial explosion of code paths in VectorExpressions // with many arguments. @@ -144,6 +135,44 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum]; + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + vector[outElementNum] = + ((DoubleColumnVector) inputVector).vector[inputElementNum]; + } else { + isNull[outElementNum] = true; + noNulls = false; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + double[] oldArray = vector; + vector = new double[size]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + } + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index f0545fe..0afe5db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -17,12 +17,9 @@ */ package org.apache.hadoop.hive.ql.exec.vector; +import java.io.IOException; import java.util.Arrays; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; - /** * This class represents a nullable int column vector. * This class will be used for operations on all integer types (tinyint, smallint, int, bigint) @@ -36,7 +33,6 @@ */ public class LongColumnVector extends ColumnVector { public long[] vector; - private final LongWritable writableObj = new LongWritable(); public static final long NULL_VALUE = 1; /** @@ -50,26 +46,13 @@ public LongColumnVector() { /** * Don't use this except for testing purposes. * - * @param len + * @param len the number of rows */ public LongColumnVector(int len) { super(len); vector = new long[len]; } - @Override - public Writable getWritableObject(int index) { - if (this.isRepeating) { - index = 0; - } - if (!noNulls && isNull[index]) { - return NullWritable.get(); - } else { - writableObj.set(vector[index]); - return writableObj; - } - } - // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. public void copySelected( @@ -141,7 +124,9 @@ public void copySelected( } } else { - System.arraycopy(vector, 0, output.vector, 0, size); + for(int i = 0; i < size; ++i) { + output.vector[i] = vector[i]; + } } // Copy nulls over if needed @@ -165,6 +150,14 @@ public void fill(long value) { vector[0] = value; } + // Fill the column vector with nulls + public void fillWithNulls() { + noNulls = false; + isRepeating = true; + vector[0] = NULL_VALUE; + isNull[0] = true; + } + // Simplify vector by brute-force flattening noNulls and isRepeating // This can be used to reduce combinatorial explosion of code paths in VectorExpressions // with many arguments. @@ -188,6 +181,44 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum]; + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + vector[outElementNum] = + ((LongColumnVector) inputVector).vector[inputElementNum]; + } else { + isNull[outElementNum] = true; + noNulls = false; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append(vector[row]); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + long[] oldArray = vector; + vector = new long[size]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + } + } + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 8452abd..56cf9ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -31,15 +31,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hive.common.util.DateUtils; @@ -61,12 +56,12 @@ private Reader[] readersByValue; private Reader[] readersByReference; - private PrimitiveTypeInfo[] primitiveTypeInfos; + private TypeInfo[] typeInfos; public VectorDeserializeRow(DeserializeRead deserializeRead) { this(); this.deserializeRead = deserializeRead; - primitiveTypeInfos = deserializeRead.primitiveTypeInfos(); + typeInfos = deserializeRead.typeInfos(); } @@ -564,7 +559,7 @@ private void addReader(int index, int outputColumn) throws HiveException { Reader readerByValue = null; Reader readerByReference = null; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[index]; PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); switch (primitiveCategory) { // case VOID: @@ -642,10 +637,10 @@ private void addReader(int index, int outputColumn) throws HiveException { public void init(int[] outputColumns) throws HiveException { - readersByValue = new Reader[primitiveTypeInfos.length]; - readersByReference = new Reader[primitiveTypeInfos.length]; + readersByValue = new Reader[typeInfos.length]; + readersByReference = new Reader[typeInfos.length]; - for (int i = 0; i < primitiveTypeInfos.length; i++) { + for (int i = 0; i < typeInfos.length; i++) { int outputColumn = outputColumns[i]; addReader(i, outputColumn); } @@ -653,10 +648,10 @@ public void init(int[] outputColumns) throws HiveException { public void init(List outputColumns) throws HiveException { - readersByValue = new Reader[primitiveTypeInfos.length]; - readersByReference = new Reader[primitiveTypeInfos.length]; + readersByValue = new Reader[typeInfos.length]; + readersByReference = new Reader[typeInfos.length]; - for (int i = 0; i < primitiveTypeInfos.length; i++) { + for (int i = 0; i < typeInfos.length; i++) { int outputColumn = outputColumns.get(i); addReader(i, outputColumn); } @@ -664,10 +659,10 @@ public void init(List outputColumns) throws HiveException { public void init(int startColumn) throws HiveException { - readersByValue = new Reader[primitiveTypeInfos.length]; - readersByReference = new Reader[primitiveTypeInfos.length]; + readersByValue = new Reader[typeInfos.length]; + readersByReference = new Reader[typeInfos.length]; - for (int i = 0; i < primitiveTypeInfos.length; i++) { + for (int i = 0; i < typeInfos.length; i++) { int outputColumn = startColumn + i; addReader(i, outputColumn); } @@ -709,14 +704,14 @@ public void deserializeByReference(VectorizedRowBatch batch, int batchIndex) thr private void throwMoreDetailedException(IOException e, int index) throws EOFException { StringBuilder sb = new StringBuilder(); - sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " + primitiveTypeInfos.length + " fields ("); - for (int i = 0; i < primitiveTypeInfos.length; i++) { + sb.append("Detail: \"" + e.toString() + "\" occured for field " + index + " of " + typeInfos.length + " fields ("); + for (int i = 0; i < typeInfos.length; i++) { if (i > 0) { sb.append(", "); } - sb.append(primitiveTypeInfos[i].getPrimitiveCategory().name()); + sb.append(((PrimitiveTypeInfo) typeInfos[i]).getPrimitiveCategory().name()); } sb.append(")"); throw new EOFException(sb.toString()); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index ee6939d..9774f0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -468,6 +468,9 @@ Object extract(int batchIndex) { int start = colVector.start[adjustedIndex]; int length = colVector.length[adjustedIndex]; + if (value == null) { + LOG.info("null string entry: batchIndex " + batchIndex + " columnIndex " + columnIndex); + } // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. text.set(value, start, length); @@ -727,9 +730,9 @@ public Object extractRowColumn(int batchIndex, int logicalColumnIndex) { } public void extractRow(int batchIndex, Object[] objects) { - int i = 0; - for (Extractor extracter : extracters) { - objects[i++] = extracter.extract(batchIndex); + for (int i = 0; i < extracters.length; i++) { + Extractor extracter = extracters[i]; + objects[i] = extracter.extract(batchIndex); } } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 39a83e3..fa66964 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -814,7 +814,7 @@ public VectorGroupByOperator() { outputFieldNames, objectInspectors); if (isVectorOutput) { vrbCtx = new VectorizedRowBatchCtx(); - vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) outputObjInspector); + vrbCtx.init((StructObjectInspector) outputObjInspector, vOutContext.getScratchColumnTypeNames()); outputBatch = vrbCtx.createVectorizedRowBatch(); vectorAssignRowSameBatch = new VectorAssignRowSameBatch(); vectorAssignRowSameBatch.init((StructObjectInspector) outputObjInspector, vOutContext.getProjectedColumns()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 0baec2c..9920e9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -91,7 +91,7 @@ public VectorMapJoinBaseOperator (VectorizationContext vContext, OperatorDesc co Collection> result = super.initializeOp(hconf); vrbCtx = new VectorizedRowBatchCtx(); - vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); + vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames()); outputBatch = vrbCtx.createVectorizedRowBatch(); @@ -182,4 +182,4 @@ protected void reProcessBigTable(int partitionId) public VectorizationContext getOuputVectorizationContext() { return vOutContext; } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 804ba17..66190ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -146,7 +146,7 @@ public VectorSMBMapJoinOperator(VectorizationContext vContext, OperatorDesc conf Collection> result = super.initializeOp(hconf); vrbCtx = new VectorizedRowBatchCtx(); - vrbCtx.init(vOutContext.getScratchColumnTypeMap(), (StructObjectInspector) this.outputObjInspector); + vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames()); outputBatch = vrbCtx.createVectorizedRowBatch(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index 342bf67..5586944 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -22,8 +22,6 @@ import java.sql.Timestamp; import java.util.List; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -604,13 +602,13 @@ public void init(List typeNames) throws HiveException { } } - public void init(PrimitiveTypeInfo[] primitiveTypeInfos, List columnMap) + public void init(TypeInfo[] typeInfos, int[] columnMap) throws HiveException { - writers = new Writer[primitiveTypeInfos.length]; - for (int i = 0; i < primitiveTypeInfos.length; i++) { - int columnIndex = columnMap.get(i); - Writer writer = createWriter(primitiveTypeInfos[i], columnIndex); + writers = new Writer[typeInfos.length]; + for (int i = 0; i < typeInfos.length; i++) { + int columnIndex = columnMap[i]; + Writer writer = createWriter(typeInfos[i], columnIndex); writers[i] = writer; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index da89e38..ea03099 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -145,6 +145,8 @@ VectorExpressionDescriptor vMap; + private List initialColumnNames; + private List projectedColumns; private List projectionColumnNames; private Map projectionColumnMap; @@ -158,7 +160,11 @@ public VectorizationContext(String contextName, List initialColumnNames) { this.contextName = contextName; level = 0; - LOG.info("VectorizationContext consructor contextName " + contextName + " level " + level + " initialColumnNames " + initialColumnNames.toString()); + if (LOG.isDebugEnabled()) { + LOG.debug("VectorizationContext consructor contextName " + contextName + " level " + + level + " initialColumnNames " + initialColumnNames); + } + this.initialColumnNames = initialColumnNames; this.projectionColumnNames = initialColumnNames; projectedColumns = new ArrayList(); @@ -178,8 +184,11 @@ public VectorizationContext(String contextName, List initialColumnNames) public VectorizationContext(String contextName) { this.contextName = contextName; level = 0; - LOG.info("VectorizationContext consructor contextName " + contextName + " level " + level); - projectedColumns = new ArrayList(); + if (LOG.isDebugEnabled()) { + LOG.debug("VectorizationContext consructor contextName " + contextName + " level " + level); + } + initialColumnNames = new ArrayList(); + projectedColumns = new ArrayList(); projectionColumnNames = new ArrayList(); projectionColumnMap = new HashMap(); this.ocm = new OutputColumnManager(0); @@ -194,6 +203,7 @@ public VectorizationContext(String contextName, VectorizationContext vContext) { this.contextName = contextName; level = vContext.level + 1; LOG.info("VectorizationContext consructor reference contextName " + contextName + " level " + level); + this.initialColumnNames = vContext.initialColumnNames; this.projectedColumns = new ArrayList(); this.projectionColumnNames = new ArrayList(); this.projectionColumnMap = new HashMap(); @@ -206,6 +216,7 @@ public VectorizationContext(String contextName, VectorizationContext vContext) { // Add an initial column to a vectorization context when // a vectorized row batch is being created. public void addInitialColumn(String columnName) { + initialColumnNames.add(columnName); int index = projectedColumns.size(); projectedColumns.add(index); projectionColumnNames.add(columnName); @@ -234,6 +245,10 @@ public void addProjectionColumn(String columnName, int vectorBatchColIndex) { projectionColumnMap.put(columnName, vectorBatchColIndex); } + public List getInitialColumnNames() { + return initialColumnNames; + } + public List getProjectedColumns() { return projectedColumns; } @@ -2303,36 +2318,51 @@ public static String mapTypeNameSynonyms(String typeName) { } public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws HiveException { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - - switch (primitiveCategory) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DATE: - case TIMESTAMP: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - return ColumnVector.Type.LONG; - - case FLOAT: - case DOUBLE: - return ColumnVector.Type.DOUBLE; - - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - return ColumnVector.Type.BYTES; - - case DECIMAL: - return ColumnVector.Type.DECIMAL; - - default: - throw new HiveException("Unexpected primitive type category " + primitiveCategory); + switch (typeInfo.getCategory()) { + case STRUCT: + return Type.STRUCT; + case UNION: + return Type.UNION; + case LIST: + return Type.LIST; + case MAP: + return Type.MAP; + case PRIMITIVE: { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + + switch (primitiveCategory) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DATE: + case TIMESTAMP: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY_TIME: + return ColumnVector.Type.LONG; + + case FLOAT: + case DOUBLE: + return ColumnVector.Type.DOUBLE; + + case STRING: + case CHAR: + case VARCHAR: + case BINARY: + return ColumnVector.Type.BYTES; + + case DECIMAL: + return ColumnVector.Type.DECIMAL; + + default: + throw new RuntimeException("Unexpected primitive type category " + primitiveCategory); + } + } + default: + throw new RuntimeException("Unexpected type category " + + typeInfo.getCategory()); } } @@ -2442,13 +2472,16 @@ public int firstOutputColumnIndex() { return firstOutputColumnIndex; } - public Map getScratchColumnTypeMap() { - Map map = new HashMap(); + public String[] getScratchColumnTypeNames() { + String[] result = new String[ocm.outputColCount]; for (int i = 0; i < ocm.outputColCount; i++) { - String type = ocm.outputColumnsTypes[i]; - map.put(i+this.firstOutputColumnIndex, type); + String typeName = ocm.outputColumnsTypes[i]; + if (typeName.equalsIgnoreCase("long")) { + typeName = "bigint"; // Convert our synonym to a real Hive type name. + } + result[i] = typeName; } - return map; + return result; } @Override @@ -2468,9 +2501,7 @@ public int compare(Integer o1, Integer o2) { } sb.append("sorted projectionColumnMap ").append(sortedColumnMap).append(", "); - Map sortedScratchColumnTypeMap = new TreeMap(comparerInteger); - sortedScratchColumnTypeMap.putAll(getScratchColumnTypeMap()); - sb.append("sorted scratchColumnTypeMap ").append(sortedScratchColumnTypeMap); + sb.append("scratchColumnTypeNames ").append(getScratchColumnTypeNames().toString()); return sb.toString(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 3d7e4f0..b7e13dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -53,9 +53,13 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; @@ -111,137 +115,55 @@ public static void setBatchSize(VectorizedRowBatch batch, int size) { batch.size = size; } - /** - * Walk through the object inspector and add column vectors - * - * @param oi - * @param cvList - * ColumnVectors are populated in this list - */ - private static void allocateColumnVector(StructObjectInspector oi, - List cvList) throws HiveException { - if (cvList == null) { - throw new HiveException("Null columnvector list"); - } - if (oi == null) { - return; - } - final List fields = oi.getAllStructFieldRefs(); - for(StructField field : fields) { - ObjectInspector fieldObjectInspector = field.getFieldObjectInspector(); - switch(fieldObjectInspector.getCategory()) { - case PRIMITIVE: - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldObjectInspector; - switch(poi.getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case TIMESTAMP: - case DATE: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - cvList.add(new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); - break; - case FLOAT: - case DOUBLE: - cvList.add(new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); - break; - case BINARY: - case STRING: - case CHAR: - case VARCHAR: - cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); - break; - case DECIMAL: - DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo(); - cvList.add(new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, - tInfo.precision(), tInfo.scale())); - break; - default: - throw new HiveException("Vectorizaton is not supported for datatype:" - + poi.getPrimitiveCategory()); - } - break; - case STRUCT: - throw new HiveException("Struct not supported"); - default: - throw new HiveException("Flattening is not supported for datatype:" - + fieldObjectInspector.getCategory()); - } - } - } - - - /** - * Create VectorizedRowBatch from ObjectInspector - * - * @param oi - * @return - * @throws HiveException - */ - public static VectorizedRowBatch constructVectorizedRowBatch( - StructObjectInspector oi) throws HiveException { - final List cvList = new LinkedList(); - allocateColumnVector(oi, cvList); - final VectorizedRowBatch result = new VectorizedRowBatch(cvList.size()); - int i = 0; - for(ColumnVector cv : cvList) { - result.cols[i++] = cv; - } - return result; - } + public static ColumnVector createColumnVector(String typeName) { + typeName = typeName.toLowerCase(); - /** - * Create VectorizedRowBatch from key and value object inspectors - * The row object inspector used by ReduceWork needs to be a **standard** - * struct object inspector, not just any struct object inspector. - * @param keyInspector - * @param valueInspector - * @param vectorScratchColumnTypeMap - * @return VectorizedRowBatch, OI - * @throws HiveException - */ - public static ObjectPair constructVectorizedRowBatch( - StructObjectInspector keyInspector, StructObjectInspector valueInspector, Map vectorScratchColumnTypeMap) - throws HiveException { - - ArrayList colNames = new ArrayList(); - ArrayList ois = new ArrayList(); - List fields = keyInspector.getAllStructFieldRefs(); - for (StructField field: fields) { - colNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); - ois.add(field.getFieldObjectInspector()); + // Allow undecorated CHAR and VARCHAR to support scratch column type names. + if (typeName.equals("char") || typeName.equals("varchar")) { + return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); } - fields = valueInspector.getAllStructFieldRefs(); - for (StructField field: fields) { - colNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); - ois.add(field.getFieldObjectInspector()); - } - StandardStructObjectInspector rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, ois); - VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); - batchContext.init(vectorScratchColumnTypeMap, rowObjectInspector); - return new ObjectPair<>(batchContext.createVectorizedRowBatch(), rowObjectInspector); + TypeInfo typeInfo = (TypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + return createColumnVector(typeInfo); } - /** - * Iterates through all columns in a given row and populates the batch - * - * @param row - * @param oi - * @param rowIndex - * @param batch - * @param buffer - * @throws HiveException - */ - public static void addRowToBatch(Object row, StructObjectInspector oi, - int rowIndex, - VectorizedRowBatch batch, - DataOutputBuffer buffer - ) throws HiveException { - addRowToBatchFrom(row, oi, rowIndex, 0, batch, buffer); + public static ColumnVector createColumnVector(TypeInfo typeInfo) { + switch(typeInfo.getCategory()) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + switch(primitiveTypeInfo.getPrimitiveCategory()) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMP: + case DATE: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY_TIME: + return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case FLOAT: + case DOUBLE: + return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case BINARY: + case STRING: + case CHAR: + case VARCHAR: + return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case DECIMAL: + DecimalTypeInfo tInfo = (DecimalTypeInfo) primitiveTypeInfo; + return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + tInfo.precision(), tInfo.scale()); + default: + throw new RuntimeException("Vectorizaton is not supported for datatype:" + + primitiveTypeInfo.getPrimitiveCategory()); + } + } + default: + throw new RuntimeException("Vectorization is not supported for datatype:" + + typeInfo.getCategory()); + } } /** @@ -584,31 +506,30 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames,oids); } - public static PrimitiveTypeInfo[] primitiveTypeInfosFromStructObjectInspector( + public static String[] columnNamesFromStructObjectInspector( StructObjectInspector structObjectInspector) throws HiveException { List fields = structObjectInspector.getAllStructFieldRefs(); - PrimitiveTypeInfo[] result = new PrimitiveTypeInfo[fields.size()]; + String[] result = new String[fields.size()]; int i = 0; for(StructField field : fields) { - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( - field.getFieldObjectInspector().getTypeName()); - result[i++] = (PrimitiveTypeInfo) typeInfo; + result[i++] = field.getFieldName(); } return result; } - public static PrimitiveTypeInfo[] primitiveTypeInfosFromTypeNames( - String[] typeNames) throws HiveException { - - PrimitiveTypeInfo[] result = new PrimitiveTypeInfo[typeNames.length]; + public static TypeInfo[] typeInfosFromTypeNames(String[] typeNames) throws HiveException { + ArrayList typeInfoList = + TypeInfoUtils.typeInfosFromTypeNames(Arrays.asList(typeNames)); + return typeInfoList.toArray(new TypeInfo[0]); + } - for(int i = 0; i < typeNames.length; i++) { - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames[i]); - result[i] = (PrimitiveTypeInfo) typeInfo; - } - return result; + public static TypeInfo[] typeInfosFromStructObjectInspector( + StructObjectInspector structObjectInspector) { + ArrayList typeInfoList = + TypeInfoUtils.typeInfosFromStructObjectInspector(structObjectInspector); + return typeInfoList.toArray(new TypeInfo[0]); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java deleted file mode 100644 index 5ce7553..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ /dev/null @@ -1,277 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.nio.ByteBuffer; -import java.sql.Timestamp; -import java.util.List; - -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.LazyDate; -import org.apache.hadoop.hive.serde2.lazy.LazyLong; -import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; -import org.apache.hadoop.hive.serde2.lazy.LazyUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.ObjectWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; - -/** - * VectorizedColumnarSerDe is used by Vectorized query execution engine - * for columnar based storage supported by RCFile. - */ -public class VectorizedColumnarSerDe extends ColumnarSerDe implements VectorizedSerde { - - public VectorizedColumnarSerDe() throws SerDeException { - } - - private final BytesRefArrayWritable[] byteRefArray = new BytesRefArrayWritable[VectorizedRowBatch.DEFAULT_SIZE]; - private final ObjectWritable ow = new ObjectWritable(); - private final ByteStream.Output serializeVectorStream = new ByteStream.Output(); - - /** - * Serialize a vectorized row batch - * - * @param vrg - * Vectorized row batch to serialize - * @param objInspector - * The ObjectInspector for the row object - * @return The serialized Writable object - * @throws SerDeException - * @see SerDe#serialize(Object, ObjectInspector) - */ - @Override - public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspector) - throws SerDeException { - try { - // Validate that the OI is of struct type - if (objInspector.getCategory() != Category.STRUCT) { - throw new UnsupportedOperationException(getClass().toString() - + " can only serialize struct types, but we got: " - + objInspector.getTypeName()); - } - - VectorizedRowBatch batch = (VectorizedRowBatch) vrg; - StructObjectInspector soi = (StructObjectInspector) objInspector; - List fields = soi.getAllStructFieldRefs(); - - // Reset the byte buffer - serializeVectorStream.reset(); - int count = 0; - int rowIndex = 0; - for (int i = 0; i < batch.size; i++) { - - // If selectedInUse is true then we need to serialize only - // the selected indexes - if (batch.selectedInUse) { - rowIndex = batch.selected[i]; - } else { - rowIndex = i; - } - - BytesRefArrayWritable byteRow = byteRefArray[i]; - int numCols = fields.size(); - - if (byteRow == null) { - byteRow = new BytesRefArrayWritable(numCols); - byteRefArray[i] = byteRow; - } - - byteRow.resetValid(numCols); - - for (int p = 0; p < batch.projectionSize; p++) { - int k = batch.projectedColumns[p]; - ObjectInspector foi = fields.get(k).getFieldObjectInspector(); - ColumnVector currentColVector = batch.cols[k]; - - switch (foi.getCategory()) { - case PRIMITIVE: { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; - if (!currentColVector.noNulls - && (currentColVector.isRepeating || currentColVector.isNull[rowIndex])) { - // The column is null hence write null value - serializeVectorStream.write(new byte[0], 0, 0); - } else { - // If here then the vector value is not null. - if (currentColVector.isRepeating) { - // If the vector has repeating values then set rowindex to zero - rowIndex = 0; - } - - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: { - LongColumnVector lcv = (LongColumnVector) batch.cols[k]; - // In vectorization true is stored as 1 and false as 0 - boolean b = lcv.vector[rowIndex] == 1 ? true : false; - if (b) { - serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length); - } else { - serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length); - } - } - break; - case BYTE: - case SHORT: - case INT: - case LONG: - LongColumnVector lcv = (LongColumnVector) batch.cols[k]; - LazyLong.writeUTF8(serializeVectorStream, lcv.vector[rowIndex]); - break; - case FLOAT: - case DOUBLE: - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[k]; - ByteBuffer b = Text.encode(String.valueOf(dcv.vector[rowIndex])); - serializeVectorStream.write(b.array(), 0, b.limit()); - break; - case BINARY: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; - byte[] bytes = bcv.vector[rowIndex]; - serializeVectorStream.write(bytes, 0, bytes.length); - } - break; - case STRING: - case CHAR: - case VARCHAR: { - // Is it correct to escape CHAR and VARCHAR? - BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; - LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex], - bcv.start[rowIndex], - bcv.length[rowIndex], - serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams - .getNeedsEscape()); - } - break; - case TIMESTAMP: - LongColumnVector tcv = (LongColumnVector) batch.cols[k]; - long timeInNanoSec = tcv.vector[rowIndex]; - Timestamp t = new Timestamp(0); - TimestampUtils.assignTimeInNanoSec(timeInNanoSec, t); - TimestampWritable tw = new TimestampWritable(); - tw.set(t); - LazyTimestamp.writeUTF8(serializeVectorStream, tw); - break; - case DATE: - LongColumnVector dacv = (LongColumnVector) batch.cols[k]; - DateWritable daw = new DateWritable((int) dacv.vector[rowIndex]); - LazyDate.writeUTF8(serializeVectorStream, daw); - break; - default: - throw new UnsupportedOperationException( - "Vectorizaton is not supported for datatype:" - + poi.getPrimitiveCategory()); - } - } - break; - } - case LIST: - case MAP: - case STRUCT: - case UNION: - throw new UnsupportedOperationException("Vectorizaton is not supported for datatype:" - + foi.getCategory()); - default: - throw new SerDeException("Unknown ObjectInspector category!"); - - } - - byteRow.get(k).set(serializeVectorStream.getData(), count, serializeVectorStream - .getLength() - count); - count = serializeVectorStream.getLength(); - } - - } - ow.set(byteRefArray); - } catch (Exception e) { - throw new SerDeException(e); - } - return ow; - } - - @Override - public SerDeStats getSerDeStats() { - return null; - } - - @Override - public Class getSerializedClass() { - return BytesRefArrayWritable.class; - } - - @Override - public Object deserialize(Writable blob) throws SerDeException { - - // Ideally this should throw UnsupportedOperationException as the serde is - // vectorized serde. But since RC file reader does not support vectorized reading this - // is left as it is. This function will be called from VectorizedRowBatchCtx::addRowToBatch - // to deserialize the row one by one and populate the batch. Once RC file reader supports vectorized - // reading this serde and be standalone serde with no dependency on ColumnarSerDe. - return super.deserialize(blob); - } - - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return cachedObjectInspector; - } - - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { - throw new UnsupportedOperationException(); - } - - /** - * Deserializes the rowBlob into Vectorized row batch - * @param rowBlob - * rowBlob row batch to deserialize - * @param rowsInBlob - * Total number of rows in rowBlob to deserialize - * @param reuseBatch - * VectorizedRowBatch to which the rows should be serialized * - * @throws SerDeException - */ - @Override - public void deserializeVector(Object rowBlob, int rowsInBlob, - VectorizedRowBatch reuseBatch) throws SerDeException { - - BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[]) rowBlob; - DataOutputBuffer buffer = new DataOutputBuffer(); - for (int i = 0; i < rowsInBlob; i++) { - Object row = deserialize(refArray[i]); - try { - VectorizedBatchUtil.addRowToBatch(row, - (StructObjectInspector) cachedObjectInspector, i, - reuseBatch, buffer); - } catch (HiveException e) { - throw new SerDeException(e); - } - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 7e41384..2882024 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -45,6 +45,10 @@ public int[] projectedColumns; public int projectionSize; + private int dataColumnCount; + private int partitionColumnCount; + + /* * If no filtering has been applied yet, selectedInUse is false, * meaning that all rows qualify. If it is true, then the selected[] array @@ -94,6 +98,22 @@ public VectorizedRowBatch(int numCols, int size) { for (int i = 0; i < numCols; i++) { projectedColumns[i] = i; } + + dataColumnCount = -1; + partitionColumnCount = -1; + } + + public void setPartitionInfo(int dataColumnCount, int partitionColumnCount) { + this.dataColumnCount = dataColumnCount; + this.partitionColumnCount = partitionColumnCount; + } + + public int getDataColumnCount() { + return dataColumnCount; + } + + public int getPartitionColumnCount() { + return partitionColumnCount; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 81ab129..efb06b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -20,16 +20,10 @@ import java.io.IOException; import java.sql.Date; import java.sql.Timestamp; -import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -38,392 +32,270 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hive.common.util.DateUtils; /** - * Context for Vectorized row batch. this calss does eager deserialization of row data using serde + * Context for Vectorized row batch. this class does eager deserialization of row data using serde * in the RecordReader layer. * It has supports partitions in this layer so that the vectorized batch is populated correctly * with the partition column. */ public class VectorizedRowBatchCtx { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(VectorizedRowBatchCtx.class.getName()); - // OI for raw row data (EG without partition cols) - private StructObjectInspector rawRowOI; + // The following information is for creating VectorizedRowBatch and for helping with + // knowing how the table is partitioned. + // + // It will be stored in MapWork and ReduceWork. + private String[] rowColumnNames; + private TypeInfo[] rowColumnTypeInfos; + private int dataColumnCount; + private int partitionColumnCount; - // OI for the row (Raw row OI + partition OI) - private StructObjectInspector rowOI; + private String[] scratchColumnTypeNames; - // Deserializer for the row data - private Deserializer deserializer; + /** + * Constructor for VectorizedRowBatchCtx + */ + public VectorizedRowBatchCtx() { + } - // Hash map of partition values. Key=TblColName value=PartitionValue - private Map partitionValues; - - //partition types - private Map partitionTypes; + public VectorizedRowBatchCtx(String[] rowColumnNames, TypeInfo[] rowColumnTypeInfos, + int partitionColumnCount, String[] scratchColumnTypeNames) { + this.rowColumnNames = rowColumnNames; + this.rowColumnTypeInfos = rowColumnTypeInfos; + this.partitionColumnCount = partitionColumnCount; + this.scratchColumnTypeNames = scratchColumnTypeNames; - // partition column positions, for use by classes that need to know whether a given column is a - // partition column - private Set partitionCols; - - // Column projection list - List of column indexes to include. This - // list does not contain partition columns - private List colsToInclude; + dataColumnCount = rowColumnTypeInfos.length - partitionColumnCount; + } - private Map scratchColumnTypeMap = null; + public String[] getRowColumnNames() { + return rowColumnNames; + } - /** - * Constructor for VectorizedRowBatchCtx - * - * @param rawRowOI - * OI for raw row data (EG without partition cols) - * @param rowOI - * OI for the row (Raw row OI + partition OI) - * @param deserializer - * Deserializer for the row data - * @param partitionValues - * Hash map of partition values. Key=TblColName value=PartitionValue - */ - public VectorizedRowBatchCtx(StructObjectInspector rawRowOI, StructObjectInspector rowOI, - Deserializer deserializer, Map partitionValues, - Map partitionTypes) { - this.rowOI = rowOI; - this.rawRowOI = rawRowOI; - this.deserializer = deserializer; - this.partitionValues = partitionValues; - this.partitionTypes = partitionTypes; + public TypeInfo[] getRowColumnTypeInfos() { + return rowColumnTypeInfos; } - /** - * Constructor for VectorizedRowBatchCtx - */ - public VectorizedRowBatchCtx() { + public int getDataColumnCount() { + return dataColumnCount; + } + public int getPartitionColumnCount() { + return partitionColumnCount; + } + + public String[] getScratchColumnTypeNames() { + return scratchColumnTypeNames; } /** - * Initializes the VectorizedRowBatch context based on an scratch column type map and + * Initializes the VectorizedRowBatch context based on an scratch column type names and * object inspector. - * @param scratchColumnTypeMap - * @param rowOI + * @param structObjectInspector + * @param scratchColumnTypeNames * Object inspector that shapes the column types + * @throws HiveException */ - public void init(Map scratchColumnTypeMap, - StructObjectInspector rowOI) { - this.scratchColumnTypeMap = scratchColumnTypeMap; - this.rowOI= rowOI; - this.rawRowOI = rowOI; + public void init(StructObjectInspector structObjectInspector, String[] scratchColumnTypeNames) + throws HiveException { + + // Row column information. + rowColumnNames = VectorizedBatchUtil.columnNamesFromStructObjectInspector(structObjectInspector); + rowColumnTypeInfos = VectorizedBatchUtil.typeInfosFromStructObjectInspector(structObjectInspector); + partitionColumnCount = 0; + dataColumnCount = rowColumnTypeInfos.length; + + // Scratch column information. + this.scratchColumnTypeNames = scratchColumnTypeNames; } - /** - * Initializes VectorizedRowBatch context based on the - * split and Hive configuration (Job conf with hive Plan). - * - * @param hiveConf - * Hive configuration using Hive plan is extracted - * @param split - * File split of the file being read - * @throws ClassNotFoundException - * @throws IOException - * @throws SerDeException - * @throws InstantiationException - * @throws IllegalAccessException - * @throws HiveException - */ - public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundException, - IOException, - SerDeException, - InstantiationException, - IllegalAccessException, - HiveException { + public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, Configuration hiveConf, + FileSplit split, Object[] partitionValues) throws IOException { Map pathToPartitionInfo = Utilities .getMapWork(hiveConf).getPathToPartitionInfo(); - PartitionDesc part = HiveFileFormatUtils + PartitionDesc partDesc = HiveFileFormatUtils .getPartitionDescFromPathRecursively(pathToPartitionInfo, split.getPath(), IOPrepareCache.get().getPartitionDescMap()); - String partitionPath = split.getPath().getParent().toString(); - scratchColumnTypeMap = Utilities.getMapWorkVectorScratchColumnTypeMap(hiveConf); - // LOG.info("VectorizedRowBatchCtx init scratchColumnTypeMap " + scratchColumnTypeMap.toString()); - - Properties partProps = - (part.getPartSpec() == null || part.getPartSpec().isEmpty()) ? - part.getTableDesc().getProperties() : part.getProperties(); - - Class serdeclass = hiveConf.getClassByName(part.getSerdeClassName()); - Deserializer partDeserializer = (Deserializer) serdeclass.newInstance(); - SerDeUtils.initializeSerDe(partDeserializer, hiveConf, part.getTableDesc().getProperties(), - partProps); - StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer - .getObjectInspector(); - - deserializer = partDeserializer; - - // Check to see if this split is part of a partition of a table - String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - - String[] partKeys = null; - if (pcols != null && pcols.length() > 0) { - - // Partitions exist for this table. Get the partition object inspector and - // raw row object inspector (row with out partition col) - LinkedHashMap partSpec = part.getPartSpec(); - partKeys = pcols.trim().split("/"); - String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - String[] partKeyTypes = pcolTypes.trim().split(":"); - - if (partKeys.length > partKeyTypes.length) { - throw new HiveException("Internal error : partKeys length, " +partKeys.length + - " greater than partKeyTypes length, " + partKeyTypes.length); - } - - List partNames = new ArrayList(partKeys.length); - List partObjectInspectors = new ArrayList(partKeys.length); - partitionValues = new LinkedHashMap(); - partitionTypes = new LinkedHashMap(); - for (int i = 0; i < partKeys.length; i++) { - String key = partKeys[i]; - partNames.add(key); - ObjectInspector objectInspector = null; - Object objectVal; - if (partSpec == null) { - // for partitionless table, initialize partValue to empty string. - // We can have partitionless table even if we have partition keys - // when there is only only partition selected and the partition key is not - // part of the projection/include list. - objectVal = null; - objectInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector; - partitionTypes.put(key, PrimitiveCategory.STRING); - } else { - // Create a Standard java object Inspector - PrimitiveTypeInfo partColTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]); - objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( - partColTypeInfo); - objectVal = - ObjectInspectorConverters. - getConverter(PrimitiveObjectInspectorFactory. - javaStringObjectInspector, objectInspector). - convert(partSpec.get(key)); - if (partColTypeInfo instanceof CharTypeInfo) { - objectVal = ((HiveChar) objectVal).getStrippedValue(); - } - partitionTypes.put(key, partColTypeInfo.getPrimitiveCategory()); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Partition column: name: " + key + ", value: " + objectVal + ", type: " + partitionTypes.get(key)); - } - partitionValues.put(key, objectVal); - partObjectInspectors.add(objectInspector); - } - - // Create partition OI - StructObjectInspector partObjectInspector = ObjectInspectorFactory - .getStandardStructObjectInspector(partNames, partObjectInspectors); - - // Get row OI from partition OI and raw row OI - StructObjectInspector rowObjectInspector = ObjectInspectorFactory - .getUnionStructObjectInspector(Arrays - .asList(new StructObjectInspector[] {partRawRowObjectInspector, partObjectInspector})); - rowOI = rowObjectInspector; - rawRowOI = partRawRowObjectInspector; - - // We have to do this after we've set rowOI, as getColIndexBasedOnColName uses it - partitionCols = new HashSet(); - if (pcols != null && pcols.length() > 0) { - for (int i = 0; i < partKeys.length; i++) { - partitionCols.add(getColIndexBasedOnColName(partKeys[i])); - } - } + getPartitionValues(vrbCtx, partDesc, partitionValues); - } else { + } - // No partitions for this table, hence row OI equals raw row OI - rowOI = partRawRowObjectInspector; - rawRowOI = partRawRowObjectInspector; + public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDesc partDesc, + Object[] partitionValues) { + + LinkedHashMap partSpec = partDesc.getPartSpec(); + + for (int i = 0; i < vrbCtx.partitionColumnCount; i++) { + Object objectValue; + if (partSpec == null) { + // For partition-less table, initialize partValue to empty string. + // We can have partition-less table even if we have partition keys + // when there is only only partition selected and the partition key is not + // part of the projection/include list. + objectValue = null; + } else { + String key = vrbCtx.rowColumnNames[vrbCtx.dataColumnCount + i]; + + // Create a Standard java object Inspector + ObjectInspector objectInspector = + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( + vrbCtx.rowColumnTypeInfos[vrbCtx.dataColumnCount + i]); + objectValue = + ObjectInspectorConverters. + getConverter(PrimitiveObjectInspectorFactory. + javaStringObjectInspector, objectInspector). + convert(partSpec.get(key)); + } + partitionValues[i] = objectValue; } - - colsToInclude = ColumnProjectionUtils.getReadColumnIDs(hiveConf); } - + /** * Creates a Vectorized row batch and the column vectors. * * @return VectorizedRowBatch * @throws HiveException */ - public VectorizedRowBatch createVectorizedRowBatch() throws HiveException + public VectorizedRowBatch createVectorizedRowBatch() { - List fieldRefs = rowOI.getAllStructFieldRefs(); - VectorizedRowBatch result = new VectorizedRowBatch(fieldRefs.size()); - for (int j = 0; j < fieldRefs.size(); j++) { - // If the column is included in the include list or if the column is a - // partition column then create the column vector. Also note that partition columns are not - // in the included list. - if ((colsToInclude == null) || colsToInclude.contains(j) - || ((partitionValues != null) && - partitionValues.containsKey(fieldRefs.get(j).getFieldName()))) { - ObjectInspector foi = fieldRefs.get(j).getFieldObjectInspector(); - switch (foi.getCategory()) { - case PRIMITIVE: { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; - // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, CHAR, VARCHAR, TIMESTAMP, - // DATE and DECIMAL - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case TIMESTAMP: - case DATE: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - break; - case FLOAT: - case DOUBLE: - result.cols[j] = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - break; - case BINARY: - case STRING: - case CHAR: - case VARCHAR: - result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - break; - case DECIMAL: - DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo(); - result.cols[j] = new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, - tInfo.precision(), tInfo.scale()); - break; - default: - throw new RuntimeException("Vectorizaton is not supported for datatype:" - + poi.getPrimitiveCategory()); - } - break; - } - case LIST: - case MAP: - case STRUCT: - case UNION: - throw new HiveException("Vectorizaton is not supported for datatype:" - + foi.getCategory()); - default: - throw new HiveException("Unknown ObjectInspector category!"); - } - } + int totalColumnCount = rowColumnTypeInfos.length + scratchColumnTypeNames.length; + VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); + + LOG.info("createVectorizedRowBatch columnsToIncludeTruncated NONE"); + for (int i = 0; i < rowColumnTypeInfos.length; i++) { + TypeInfo typeInfo = rowColumnTypeInfos[i]; + result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); + } + + for (int i = 0; i < scratchColumnTypeNames.length; i++) { + String typeName = scratchColumnTypeNames[i]; + result.cols[rowColumnTypeInfos.length + i] = + VectorizedBatchUtil.createColumnVector(typeName); } - result.numCols = fieldRefs.size(); - this.addScratchColumnsToBatch(result); + + result.setPartitionInfo(dataColumnCount, partitionColumnCount); + result.reset(); return result; } - /** - * Adds the row to the batch after deserializing the row - * - * @param rowIndex - * Row index in the batch to which the row is added - * @param rowBlob - * Row blob (serialized version of row) - * @param batch - * Vectorized batch to which the row is added - * @param buffer a buffer to copy strings into - * @throws HiveException - * @throws SerDeException - */ - public void addRowToBatch(int rowIndex, Writable rowBlob, - VectorizedRowBatch batch, - DataOutputBuffer buffer - ) throws HiveException, SerDeException + public VectorizedRowBatch createVectorizedRowBatch(boolean[] columnsToIncludeTruncated) { - Object row = this.deserializer.deserialize(rowBlob); - VectorizedBatchUtil.addRowToBatch(row, this.rawRowOI, rowIndex, batch, buffer); - } + if (columnsToIncludeTruncated == null) { + return createVectorizedRowBatch(); + } - /** - * Deserialized set of rows and populates the batch - * - * @param rowBlob - * to deserialize - * @param batch - * Vectorized row batch which contains deserialized data - * @throws SerDeException - */ - public void convertRowBatchBlobToVectorizedBatch(Object rowBlob, int rowsInBlob, - VectorizedRowBatch batch) - throws SerDeException { - - if (deserializer instanceof VectorizedSerde) { - ((VectorizedSerde) deserializer).deserializeVector(rowBlob, rowsInBlob, batch); - } else { - throw new SerDeException( - "Not able to deserialize row batch. Serde does not implement VectorizedSerde"); + LOG.info("createVectorizedRowBatch columnsToIncludeTruncated " + Arrays.toString(columnsToIncludeTruncated)); + int totalColumnCount = rowColumnTypeInfos.length + scratchColumnTypeNames.length; + VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); + + for (int i = 0; i < columnsToIncludeTruncated.length; i++) { + if (columnsToIncludeTruncated[i]) { + TypeInfo typeInfo = rowColumnTypeInfos[i]; + result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); + } } + + for (int i = dataColumnCount; i < dataColumnCount + partitionColumnCount; i++) { + TypeInfo typeInfo = rowColumnTypeInfos[i]; + result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); + } + + for (int i = 0; i < scratchColumnTypeNames.length; i++) { + String typeName = scratchColumnTypeNames[i]; + result.cols[rowColumnTypeInfos.length + i] = + VectorizedBatchUtil.createColumnVector(typeName); + } + + result.setPartitionInfo(dataColumnCount, partitionColumnCount); + + result.reset(); + return result; } - private int getColIndexBasedOnColName(String colName) throws HiveException - { - List fieldRefs = rowOI.getAllStructFieldRefs(); - for (int i = 0; i < fieldRefs.size(); i++) { - if (fieldRefs.get(i).getFieldName().equals(colName)) { - return i; + public boolean[] getColumnsToIncludeTruncated(Configuration conf) { + boolean[] columnsToIncludeTruncated = null; + + List columnsToIncludeTruncatedList = ColumnProjectionUtils.getReadColumnIDs(conf); + if (columnsToIncludeTruncatedList != null && columnsToIncludeTruncatedList.size() > 0 ) { + + // Partitioned columns will not be in the include list. + + boolean[] columnsToInclude = new boolean[dataColumnCount]; + Arrays.fill(columnsToInclude, false); + for (int columnNum : columnsToIncludeTruncatedList) { + if (columnNum < dataColumnCount) { + columnsToInclude[columnNum] = true; + } + } + + // Work backwards to find the highest wanted column. + + int highestWantedColumnNum = -1; + for (int i = dataColumnCount - 1; i >= 0; i--) { + if (columnsToInclude[i]) { + highestWantedColumnNum = i; + break; + } + } + if (highestWantedColumnNum == -1) { + throw new RuntimeException("No columns to include?"); + } + int newColumnCount = highestWantedColumnNum + 1; + if (newColumnCount == dataColumnCount) { + // Didn't trim any columns off the end. Use the original. + columnsToIncludeTruncated = columnsToInclude; + } else { + columnsToIncludeTruncated = Arrays.copyOf(columnsToInclude, newColumnCount); } } - throw new HiveException("Not able to find column name in row object inspector"); + return columnsToIncludeTruncated; } - + /** * Add the partition values to the batch * * @param batch + * @param partitionValues * @throws HiveException */ - public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveException + public void addPartitionColsToBatch(VectorizedRowBatch batch, Object[] partitionValues) { - int colIndex; - Object value; - PrimitiveCategory pCategory; if (partitionValues != null) { - for (String key : partitionValues.keySet()) { - colIndex = getColIndexBasedOnColName(key); - value = partitionValues.get(key); - pCategory = partitionTypes.get(key); - - switch (pCategory) { + for (int i = 0; i < partitionColumnCount; i++) { + Object value = partitionValues[i]; + + int colIndex = dataColumnCount + i; + String partitionColumnName = rowColumnNames[colIndex]; + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) rowColumnTypeInfos[colIndex]; + switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: { LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; if (value == null) { @@ -575,7 +447,7 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti HiveDecimal hd = (HiveDecimal) value; dv.set(0, hd); dv.isRepeating = true; - dv.isNull[0] = false; + dv.isNull[0] = false; } } break; @@ -604,15 +476,15 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti bcv.isNull[0] = true; bcv.isRepeating = true; } else { - bcv.fill(sVal.getBytes()); + bcv.fill(sVal.getBytes()); bcv.isNull[0] = false; } } break; - + default: - throw new HiveException("Unable to recognize the partition type " + pCategory + - " for column " + key); + throw new RuntimeException("Unable to recognize the partition type " + primitiveTypeInfo.getPrimitiveCategory() + + " for column " + partitionColumnName); } } } @@ -620,64 +492,12 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti /** * Determine whether a given column is a partition column - * @param colnum column number in + * @param colNum column number in * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}s created by this context. * @return true if it is a partition column, false otherwise */ - public final boolean isPartitionCol(int colnum) { - return (partitionCols == null) ? false : partitionCols.contains(colnum); - } - - private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException { - if (scratchColumnTypeMap != null && !scratchColumnTypeMap.isEmpty()) { - int origNumCols = vrb.numCols; - int newNumCols = vrb.cols.length+scratchColumnTypeMap.keySet().size(); - vrb.cols = Arrays.copyOf(vrb.cols, newNumCols); - for (int i = origNumCols; i < newNumCols; i++) { - String typeName = scratchColumnTypeMap.get(i); - if (typeName == null) { - throw new HiveException("No type entry found for column " + i + " in map " + scratchColumnTypeMap.toString()); - } - vrb.cols[i] = allocateColumnVector(typeName, - VectorizedRowBatch.DEFAULT_SIZE); - } - vrb.numCols = vrb.cols.length; - } - } - - /** - * Get the scale and precision for the given decimal type string. The decimal type is assumed to be - * of the format decimal(precision,scale) e.g. decimal(20,10). - * @param decimalType The given decimal type string. - * @return An integer array of size 2 with first element set to precision and second set to scale. - */ - private static int[] getScalePrecisionFromDecimalType(String decimalType) { - Pattern p = Pattern.compile("\\d+"); - Matcher m = p.matcher(decimalType); - m.find(); - int precision = Integer.parseInt(m.group()); - m.find(); - int scale = Integer.parseInt(m.group()); - int [] precScale = { precision, scale }; - return precScale; + public final boolean isPartitionCol(int colNum) { + return colNum >= dataColumnCount && colNum < rowColumnTypeInfos.length; } - public static ColumnVector allocateColumnVector(String type, int defaultSize) { - if (type.equalsIgnoreCase("double")) { - return new DoubleColumnVector(defaultSize); - } else if (VectorizationContext.isStringFamily(type)) { - return new BytesColumnVector(defaultSize); - } else if (VectorizationContext.decimalTypePattern.matcher(type).matches()){ - int [] precisionScale = getScalePrecisionFromDecimalType(type); - return new DecimalColumnVector(defaultSize, precisionScale[0], precisionScale[1]); - } else if (type.equalsIgnoreCase("long") || - type.equalsIgnoreCase("date") || - type.equalsIgnoreCase("timestamp") || - type.equalsIgnoreCase(serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME) || - type.equalsIgnoreCase(serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME)) { - return new LongColumnVector(defaultSize); - } else { - throw new RuntimeException("Cannot allocate vector column for " + type); - } - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index 35e3403..f28d3ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -25,7 +25,6 @@ import java.util.Map; import java.util.concurrent.Future; -import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -34,12 +33,7 @@ import org.apache.hadoop.hive.ql.HashTableLoaderFactory; import org.apache.hadoop.hive.ql.exec.HashTableLoader; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; @@ -58,7 +52,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader; -import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -70,6 +63,8 @@ import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * This class is common operator class for native vectorized map join. @@ -572,10 +567,11 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { * Create our vectorized copy row and deserialize row helper objects. */ if (smallTableMapping.getCount() > 0) { - smallTableVectorDeserializeRow = new VectorDeserializeRow( - new LazyBinaryDeserializeRead( - VectorizedBatchUtil.primitiveTypeInfosFromTypeNames( - smallTableMapping.getTypeNames()))); + smallTableVectorDeserializeRow = + new VectorDeserializeRow( + new LazyBinaryDeserializeRead( + VectorizedBatchUtil.typeInfosFromTypeNames( + smallTableMapping.getTypeNames()))); smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); } @@ -649,23 +645,13 @@ protected void completeInitializationOp(Object[] os) throws HiveException { * Setup our 2nd batch with the same "column schema" as the big table batch that can be used to * build join output results in. */ - protected VectorizedRowBatch setupOverflowBatch() { + protected VectorizedRowBatch setupOverflowBatch() throws HiveException { + + int initialColumnCount = vContext.firstOutputColumnIndex(); VectorizedRowBatch overflowBatch; - Map scratchColumnTypeMap = vOutContext.getScratchColumnTypeMap(); - int maxColumn = 0; - for (int i = 0; i < outputProjection.length; i++) { - int outputColumn = outputProjection[i]; - if (maxColumn < outputColumn) { - maxColumn = outputColumn; - } - } - for (int outputColumn : scratchColumnTypeMap.keySet()) { - if (maxColumn < outputColumn) { - maxColumn = outputColumn; - } - } - overflowBatch = new VectorizedRowBatch(maxColumn + 1); + int totalNumColumns = initialColumnCount + vOutContext.getScratchColumnTypeNames().length; + overflowBatch = new VectorizedRowBatch(totalNumColumns); // First, just allocate just the projection columns we will be using. for (int i = 0; i < outputProjection.length; i++) { @@ -675,9 +661,9 @@ protected VectorizedRowBatch setupOverflowBatch() { } // Now, add any scratch columns needed for children operators. - for (int outputColumn : scratchColumnTypeMap.keySet()) { - String typeName = scratchColumnTypeMap.get(outputColumn); - allocateOverflowBatchColumnVector(overflowBatch, outputColumn, typeName); + int outputColumn = initialColumnCount; + for (String typeName : vOutContext.getScratchColumnTypeNames()) { + allocateOverflowBatchColumnVector(overflowBatch, outputColumn++, typeName); } overflowBatch.projectedColumns = outputProjection; @@ -695,22 +681,13 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, String typeName) { if (overflowBatch.cols[outputColumn] == null) { - String vectorTypeName; - if (VectorizationContext.isIntFamily(typeName) || - VectorizationContext.isDatetimeFamily(typeName)) { - vectorTypeName = "long"; - } else if (VectorizationContext.isFloatFamily(typeName)) { - vectorTypeName = "double"; - } else if (VectorizationContext.isStringFamily(typeName)) { - vectorTypeName = "string"; - } else if (VectorizationContext.decimalTypePattern.matcher(typeName).matches()){ - vectorTypeName = typeName; // Keep precision and scale. - } else { - throw new RuntimeException("Cannot determine vector type for " + typeName); - } - overflowBatch.cols[outputColumn] = VectorizedRowBatchCtx.allocateColumnVector(vectorTypeName, VectorizedRowBatch.DEFAULT_SIZE); + typeName = VectorizationContext.mapTypeNameSynonyms(typeName); - if (LOG.isDebugEnabled()) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + overflowBatch.cols[outputColumn] = VectorizedBatchUtil.createColumnVector(typeInfo); + + if (isLogDebugEnabled) { LOG.debug(taskName + ", " + getOperatorId() + " VectorMapJoinCommonOperator initializeOp overflowBatch outputColumn " + outputColumn + " class " + overflowBatch.cols[outputColumn].getClass().getSimpleName()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 0adbea1..e1c2f31 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; /** @@ -73,7 +75,9 @@ private static final Log LOG = LogFactory.getLog(VectorMapJoinGenerateResultOperator.class.getName()); private static final String CLASS_NAME = VectorMapJoinGenerateResultOperator.class.getName(); - private transient PrimitiveTypeInfo[] bigTablePrimitiveTypeInfos; + //------------------------------------------------------------------------------------------------ + + private transient TypeInfo[] bigTableTypeInfos; private transient VectorSerializeRow bigTableVectorSerializeRow; @@ -394,14 +398,14 @@ protected void generateHashMapResultRepeatedAll(VectorizedRowBatch batch, private void setupSpillSerDe(VectorizedRowBatch batch) throws HiveException { - PrimitiveTypeInfo[] inputObjInspectorsTypeInfos = - VectorizedBatchUtil.primitiveTypeInfosFromStructObjectInspector( + TypeInfo[] inputObjInspectorsTypeInfos = + VectorizedBatchUtil.typeInfosFromStructObjectInspector( (StructObjectInspector) inputObjInspectors[posBigTable]); List projectedColumns = vContext.getProjectedColumns(); int projectionSize = vContext.getProjectedColumns().size(); - List typeInfoList = new ArrayList(); + List typeInfoList = new ArrayList(); List noNullsProjectionList = new ArrayList(); for (int i = 0; i < projectionSize; i++) { int projectedColumn = projectedColumns.get(i); @@ -413,17 +417,19 @@ private void setupSpillSerDe(VectorizedRowBatch batch) throws HiveException { int[] noNullsProjection = ArrayUtils.toPrimitive(noNullsProjectionList.toArray(new Integer[0])); int noNullsProjectionSize = noNullsProjection.length; - bigTablePrimitiveTypeInfos = typeInfoList.toArray(new PrimitiveTypeInfo[0]); + bigTableTypeInfos = typeInfoList.toArray(new TypeInfo[0]); bigTableVectorSerializeRow = - new VectorSerializeRow(new LazyBinarySerializeWrite(noNullsProjectionSize)); + new VectorSerializeRow( + new LazyBinarySerializeWrite(noNullsProjectionSize)); bigTableVectorSerializeRow.init( - bigTablePrimitiveTypeInfos, - noNullsProjectionList); + bigTableTypeInfos, + noNullsProjection); - bigTableVectorDeserializeRow = new VectorDeserializeRow( - new LazyBinaryDeserializeRead(bigTablePrimitiveTypeInfos)); + bigTableVectorDeserializeRow = + new VectorDeserializeRow( + new LazyBinaryDeserializeRead(bigTableTypeInfos)); bigTableVectorDeserializeRow.init(noNullsProjection); } @@ -833,4 +839,4 @@ public static String longArrayToRangesString(long selection[], int size) { sb.append("]"); return sb.toString(); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java index 8f60e9d..6d4c198 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; @@ -33,6 +34,8 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Properties; import java.util.regex.Pattern; /** @@ -572,4 +575,20 @@ private static void findOriginals(FileSystem fs, FileStatus stat, original.add(stat); } } + + public static boolean isTablePropertyTransactional(Properties props) { + String resultStr = props.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + if (resultStr == null) { + resultStr = props.getProperty(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase()); + } + return resultStr != null && resultStr.equalsIgnoreCase("true"); + } + + public static boolean isTablePropertyTransactional(Map parameters) { + String resultStr = parameters.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); + if (resultStr == null) { + resultStr = parameters.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL.toUpperCase()); + } + return resultStr != null && resultStr.equalsIgnoreCase("true"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 2d6e752..181ce84 100755 --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -490,11 +490,16 @@ public static void pushFilters(JobConf jobConf, TableScanOperator tableScan) { // ensure filters are not set from previous pushFilters jobConf.unset(TableScanDesc.FILTER_TEXT_CONF_STR); jobConf.unset(TableScanDesc.FILTER_EXPR_CONF_STR); + + Utilities.unsetSchemaEvolution(jobConf); + TableScanDesc scanDesc = tableScan.getConf(); if (scanDesc == null) { return; } + Utilities.addTableSchemaToConf(jobConf, tableScan); + // construct column name list and types for reference by filter push down Utilities.setColumnNameList(jobConf, tableScan); Utilities.setColumnTypeList(jobConf, tableScan); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java index 9879dfe..8d94da8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java @@ -36,6 +36,17 @@ public static final String AVRO = "AVRO"; public static final String AVROFILE = "AVROFILE"; + /** + * The desired TABLE column names and types for input format schema evolution. + * This is different than COLUMNS and COLUMNS_TYPES, which are based on individual partition + * metadata. + * + * Virtual columns and partition columns are not included + * + */ + public static final String SCHEMA_EVOLUTION_COLUMNS = "schema.evolution.columns"; + public static final String SCHEMA_EVOLUTION_COLUMNS_TYPES = "schema.evolution.columns.types"; + @VisibleForTesting public static final String CUSTOM_TEXT_SERDE = "CustomTextSerde"; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/SelfDescribingInputFormatInterface.java ql/src/java/org/apache/hadoop/hive/ql/io/SelfDescribingInputFormatInterface.java new file mode 100644 index 0000000..6c455bd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/SelfDescribingInputFormatInterface.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +/** + * Marker interface to indicate a given input format is self-describing and + * can perform schema evolution itself. + */ +public interface SelfDescribingInputFormatInterface { + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java deleted file mode 100644 index e9e1d5a..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileInputFormat.java +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; - -/** - * A MapReduce/Hive Vectorized input format for RC files. - */ -public class VectorizedRCFileInputFormat extends FileInputFormat - implements InputFormatChecker { - - public VectorizedRCFileInputFormat() { - setMinSplitSize(SequenceFile.SYNC_INTERVAL); - } - - @Override - @SuppressWarnings("unchecked") - public RecordReader getRecordReader(InputSplit split, JobConf job, - Reporter reporter) throws IOException { - - reporter.setStatus(split.toString()); - - return new VectorizedRCFileRecordReader(job, (FileSplit) split); - } - - @Override - public boolean validateInput(FileSystem fs, HiveConf conf, - List files) throws IOException { - if (files.size() <= 0) { - return false; - } - for (int fileId = 0; fileId < files.size(); fileId++) { - RCFile.Reader reader = null; - try { - reader = new RCFile.Reader(fs, files.get(fileId) - .getPath(), conf); - reader.close(); - reader = null; - } catch (IOException e) { - return false; - } finally { - if (null != reader) { - reader.close(); - } - } - } - return true; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java deleted file mode 100644 index 4cc1c2f..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/VectorizedRCFileRecordReader.java +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.io; - -import java.io.IOException; -import java.util.Collections; -import java.util.Map; -import java.util.WeakHashMap; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.io.RCFile.KeyBuffer; -import org.apache.hadoop.hive.ql.io.RCFile.Reader; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.RecordReader; - -/** - * RCFileRecordReader. - */ -public class VectorizedRCFileRecordReader implements RecordReader { - - private final Reader in; - private final long start; - private final long end; - private boolean more = true; - protected Configuration conf; - private final FileSplit split; - private final boolean useCache; - private VectorizedRowBatchCtx rbCtx; - private final LongWritable keyCache = new LongWritable(); - private final BytesRefArrayWritable colsCache = new BytesRefArrayWritable(); - private boolean addPartitionCols = true; - private final DataOutputBuffer buffer = new DataOutputBuffer(); - - private static RCFileSyncCache syncCache = new RCFileSyncCache(); - - private static final class RCFileSyncEntry { - long end; - long endSync; - } - - private static final class RCFileSyncCache { - - private final Map cache; - - public RCFileSyncCache() { - cache = Collections.synchronizedMap(new WeakHashMap()); - } - - public void put(FileSplit split, long endSync) { - Path path = split.getPath(); - long end = split.getStart() + split.getLength(); - String key = path.toString() + "+" + String.format("%d", end); - - RCFileSyncEntry entry = new RCFileSyncEntry(); - entry.end = end; - entry.endSync = endSync; - if (entry.endSync >= entry.end) { - cache.put(key, entry); - } - } - - public long get(FileSplit split) { - Path path = split.getPath(); - long start = split.getStart(); - String key = path.toString() + "+" + String.format("%d", start); - RCFileSyncEntry entry = cache.get(key); - if (entry != null) { - return entry.endSync; - } - return -1; - } - } - - public VectorizedRCFileRecordReader(Configuration conf, FileSplit split) - throws IOException { - - Path path = split.getPath(); - FileSystem fs = path.getFileSystem(conf); - this.in = new RCFile.Reader(fs, path, conf); - this.end = split.getStart() + split.getLength(); - this.conf = conf; - this.split = split; - - useCache = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEUSERCFILESYNCCACHE); - - if (split.getStart() > in.getPosition()) { - long oldSync = useCache ? syncCache.get(split) : -1; - if (oldSync == -1) { - in.sync(split.getStart()); // sync to start - } else { - in.seek(oldSync); - } - } - - this.start = in.getPosition(); - - more = start < end; - try { - rbCtx = new VectorizedRowBatchCtx(); - rbCtx.init(conf, split); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - public Class getKeyClass() { - return LongWritable.class; - } - - public Class getValueClass() { - return BytesRefArrayWritable.class; - } - - @Override - public NullWritable createKey() { - return NullWritable.get(); - } - - @Override - public VectorizedRowBatch createValue() { - VectorizedRowBatch result; - try { - result = rbCtx.createVectorizedRowBatch(); - } catch (HiveException e) { - throw new RuntimeException("Error creating a batch", e); - } - return result; - } - - public boolean nextBlock() throws IOException { - return in.nextBlock(); - } - - @Override - public boolean next(NullWritable key, VectorizedRowBatch value) throws IOException { - - // Reset column fields noNull values to true - VectorizedBatchUtil.setNoNullFields(value); - buffer.reset(); - value.selectedInUse = false; - for (int i = 0; i < value.numCols; i++) { - value.cols[i].isRepeating = false; - } - - int i = 0; - try { - - for (; i < VectorizedRowBatch.DEFAULT_SIZE; i++) { - more = next(keyCache); - if (more) { - // Check and update partition cols if necessary. Ideally this should be done - // in CreateValue() as the partition is constant per split. But since Hive uses - // CombineHiveRecordReader and as this does not call CreateValue() for - // each new RecordReader it creates, this check is required in next() - if (addPartitionCols) { - rbCtx.addPartitionColsToBatch(value); - addPartitionCols = false; - } - in.getCurrentRow(colsCache); - // Currently RCFile reader does not support reading vectorized - // data. Populating the batch by adding one row at a time. - rbCtx.addRowToBatch(i, (Writable) colsCache, value, buffer); - } else { - break; - } - } - } catch (Exception e) { - throw new RuntimeException("Error while getting next row", e); - } - value.size = i; - return more; - } - - protected boolean next(LongWritable key) throws IOException { - if (!more) { - return false; - } - - more = in.next(key); - - long lastSeenSyncPos = in.lastSeenSyncPos(); - - if (lastSeenSyncPos >= end) { - if (useCache) { - syncCache.put(split, lastSeenSyncPos); - } - more = false; - return more; - } - return more; - } - - /** - * Return the progress within the input split. - * - * @return 0.0 to 1.0 of the input byte range - */ - public float getProgress() throws IOException { - if (end == start) { - return 0.0f; - } else { - return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); - } - } - - public long getPos() throws IOException { - return in.getPosition(); - } - - public KeyBuffer getKeyBuffer() { - return in.getCurrentKeyBufferObj(); - } - - protected void seek(long pos) throws IOException { - in.seek(pos); - } - - public void sync(long pos) throws IOException { - in.sync(pos); - } - - public void resetBuffer() { - in.resetBuffer(); - } - - public long getStart() { - return start; - } - - public void close() throws IOException { - in.close(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java deleted file mode 100644 index aaf4eb4..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ConversionTreeReaderFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.util.List; - -/** - * Factory for creating ORC tree readers. These tree readers can handle type promotions and type - * conversions. - */ -public class ConversionTreeReaderFactory extends TreeReaderFactory { - - // TODO: This is currently only a place holder for type conversions. - - public static TreeReader createTreeReader(int columnId, - List types, - boolean[] included, - boolean skipCorrupt - ) throws IOException { - return TreeReaderFactory.createTreeReader(columnId, types, included, skipCorrupt); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index e3e6893..dade971 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.io.AcidInputFormat; @@ -53,7 +54,9 @@ import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -103,7 +106,8 @@ */ public class OrcInputFormat implements InputFormat, InputFormatChecker, VectorizedInputFormatInterface, - AcidInputFormat, CombineHiveInputFormat.AvoidSplitCombination { + SelfDescribingInputFormatInterface, AcidInputFormat, + CombineHiveInputFormat.AvoidSplitCombination { static enum SplitStrategyKind{ HYBRID, @@ -222,7 +226,14 @@ public static RecordReader createReaderFromFile(Reader file, Configuration conf, long offset, long length ) throws IOException { + + /** + * Do we have schema on read in the configuration variables? + */ + TypeDescription schema = OrcUtils.getDesiredRowTypeDescr(conf, /* isAcid */ false); + Reader.Options options = new Reader.Options().range(offset, length); + options.schema(schema); boolean isOriginal = isOriginal(file); List types = file.getTypes(); options.include(genIncludedColumns(types, conf, isOriginal)); @@ -1167,7 +1178,8 @@ private static void cancelFutures(List> futures) { if (vectorMode) { return (org.apache.hadoop.mapred.RecordReader) - new VectorizedOrcAcidRowReader(inner, conf, (FileSplit) inputSplit); + new VectorizedOrcAcidRowReader(inner, conf, + Utilities.getMapWork(conf).getVectorizedRowBatchCtx(), (FileSplit) inputSplit); } return new NullKeyRecordReader(inner, conf); } @@ -1218,10 +1230,14 @@ public float getProgress() throws IOException { } } + // The schema type description does not include the ACID fields (i.e. it is the + // non-ACID original schema). + private static boolean SCHEMA_TYPES_IS_ORIGINAL = true; @Override public RowReader getReader(InputSplit inputSplit, - Options options) throws IOException { + Options options) + throws IOException { final OrcSplit split = (OrcSplit) inputSplit; final Path path = split.getPath(); Path root; @@ -1236,36 +1252,33 @@ public float getProgress() throws IOException { } final Path[] deltas = AcidUtils.deserializeDeltas(root, split.getDeltas()); final Configuration conf = options.getConfiguration(); + + + /** + * Do we have schema on read in the configuration variables? + */ + TypeDescription schema = OrcUtils.getDesiredRowTypeDescr(conf, /* isAcid */ true); + if (schema == null) { + throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); + } + final Reader reader; final int bucket; - Reader.Options readOptions = new Reader.Options(); + Reader.Options readOptions = new Reader.Options().schema(schema); readOptions.range(split.getStart(), split.getLength()); + + // TODO: Convert genIncludedColumns and setSearchArgument to use TypeDescription. + final List schemaTypes = OrcUtils.getOrcTypes(schema); + readOptions.include(genIncludedColumns(schemaTypes, conf, SCHEMA_TYPES_IS_ORIGINAL)); + setSearchArgument(readOptions, schemaTypes, conf, SCHEMA_TYPES_IS_ORIGINAL); + if (split.hasBase()) { bucket = AcidUtils.parseBaseBucketFilename(split.getPath(), conf) .getBucket(); reader = OrcFile.createReader(path, OrcFile.readerOptions(conf)); - final List types = reader.getTypes(); - readOptions.include(genIncludedColumns(types, conf, split.isOriginal())); - setSearchArgument(readOptions, types, conf, split.isOriginal()); } else { bucket = (int) split.getStart(); reader = null; - if(deltas != null && deltas.length > 0) { - Path bucketPath = AcidUtils.createBucketFile(deltas[0], bucket); - OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(conf); - FileSystem fs = readerOptions.getFilesystem(); - if(fs == null) { - fs = path.getFileSystem(options.getConfiguration()); - } - if(fs.exists(bucketPath)) { - /* w/o schema evolution (which ACID doesn't support yet) all delta - files have the same schema, so choosing the 1st one*/ - final List types = - OrcFile.createReader(bucketPath, readerOptions).getTypes(); - readOptions.include(genIncludedColumns(types, conf, split.isOriginal())); - setSearchArgument(readOptions, types, conf, split.isOriginal()); - } - } } String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY, Long.MAX_VALUE + ":"); @@ -1278,9 +1291,7 @@ public float getProgress() throws IOException { @Override public ObjectInspector getObjectInspector() { - return ((StructObjectInspector) records.getObjectInspector()) - .getAllStructFieldRefs().get(OrcRecordUpdater.ROW) - .getFieldObjectInspector(); + return OrcStruct.createObjectInspector(0, schemaTypes); } @Override @@ -1367,5 +1378,4 @@ static Path findOriginalBucket(FileSystem fs, bucket, validTxnList, new Reader.Options(), deltaDirectory); } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index ab0c364..bad2a4c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -18,30 +18,25 @@ package org.apache.hadoop.hive.ql.io.orc; import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordIdentifier; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import java.io.IOException; -import java.util.ArrayDeque; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Deque; import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -57,6 +52,7 @@ private final Configuration conf; private final boolean collapse; private final RecordReader baseReader; + private final ObjectInspector objectInspector; private final long offset; private final long length; private final ValidTxnList validTxnList; @@ -443,6 +439,15 @@ private void discoverKeyBounds(Reader reader, this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; + + TypeDescription typeDescr = OrcUtils.getDesiredRowTypeDescr(conf, /* isAcid */ true); + if (typeDescr == null) { + throw new IOException(ErrorMsg.SCHEMA_REQUIRED_TO_READ_ACID_TABLES.getErrorCodedMsg()); + } + + objectInspector = OrcRecordUpdater.createEventSchema + (OrcStruct.createObjectInspector(0, OrcUtils.getOrcTypes(typeDescr))); + // modify the options to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { @@ -672,46 +677,7 @@ public float getProgress() throws IOException { @Override public ObjectInspector getObjectInspector() { - // Read the configuration parameters - String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); - // NOTE: if "columns.types" is missing, all columns will be of String type - String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); - - // Parse the configuration parameters - ArrayList columnNames = new ArrayList(); - Deque virtualColumns = new ArrayDeque(); - if (columnNameProperty != null && columnNameProperty.length() > 0) { - String[] colNames = columnNameProperty.split(","); - for (int i = 0; i < colNames.length; i++) { - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) { - virtualColumns.addLast(i); - } else { - columnNames.add(colNames[i]); - } - } - } - if (columnTypeProperty == null) { - // Default type: all string - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < columnNames.size(); i++) { - if (i > 0) { - sb.append(":"); - } - sb.append("string"); - } - columnTypeProperty = sb.toString(); - } - - ArrayList fieldTypes = - TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - while (virtualColumns.size() > 0) { - fieldTypes.remove(virtualColumns.removeLast()); - } - StructTypeInfo rowType = new StructTypeInfo(); - rowType.setAllStructFieldNames(columnNames); - rowType.setAllStructFieldTypeInfos(fieldTypes); - return OrcRecordUpdater.createEventSchema - (OrcStruct.createObjectInspector(rowType)); + return objectInspector; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java index db2ca15..ad4a9e8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcUtils.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -30,6 +31,21 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import com.google.common.collect.Lists; @@ -204,4 +220,550 @@ public static int getFlattenedColumnsCount(ObjectInspector inspector) { return numWriters; } + /** + * Convert a Hive type property string that contains separated type names into a list of + * TypeDescription objects. + * @return the list of TypeDescription objects. + */ + public static ArrayList typeDescriptionsFromHiveTypeProperty( + String hiveTypeProperty) { + + // CONSDIER: We need a type name parser for TypeDescription. + + ArrayList typeInfoList = TypeInfoUtils.getTypeInfosFromTypeString(hiveTypeProperty); + ArrayList typeDescrList =new ArrayList(typeInfoList.size()); + for (TypeInfo typeInfo : typeInfoList) { + typeDescrList.add(convertTypeInfo(typeInfo)); + } + return typeDescrList; + } + + public static TypeDescription convertTypeInfo(TypeInfo info) { + switch (info.getCategory()) { + case PRIMITIVE: { + PrimitiveTypeInfo pinfo = (PrimitiveTypeInfo) info; + switch (pinfo.getPrimitiveCategory()) { + case BOOLEAN: + return TypeDescription.createBoolean(); + case BYTE: + return TypeDescription.createByte(); + case SHORT: + return TypeDescription.createShort(); + case INT: + return TypeDescription.createInt(); + case LONG: + return TypeDescription.createLong(); + case FLOAT: + return TypeDescription.createFloat(); + case DOUBLE: + return TypeDescription.createDouble(); + case STRING: + return TypeDescription.createString(); + case DATE: + return TypeDescription.createDate(); + case TIMESTAMP: + return TypeDescription.createTimestamp(); + case BINARY: + return TypeDescription.createBinary(); + case DECIMAL: { + DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo; + return TypeDescription.createDecimal() + .withScale(dinfo.getScale()) + .withPrecision(dinfo.getPrecision()); + } + case VARCHAR: { + BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo; + return TypeDescription.createVarchar() + .withMaxLength(cinfo.getLength()); + } + case CHAR: { + BaseCharTypeInfo cinfo = (BaseCharTypeInfo) pinfo; + return TypeDescription.createChar() + .withMaxLength(cinfo.getLength()); + } + default: + throw new IllegalArgumentException("ORC doesn't handle primitive" + + " category " + pinfo.getPrimitiveCategory()); + } + } + case LIST: { + ListTypeInfo linfo = (ListTypeInfo) info; + return TypeDescription.createList + (convertTypeInfo(linfo.getListElementTypeInfo())); + } + case MAP: { + MapTypeInfo minfo = (MapTypeInfo) info; + return TypeDescription.createMap + (convertTypeInfo(minfo.getMapKeyTypeInfo()), + convertTypeInfo(minfo.getMapValueTypeInfo())); + } + case UNION: { + UnionTypeInfo minfo = (UnionTypeInfo) info; + TypeDescription result = TypeDescription.createUnion(); + for (TypeInfo child: minfo.getAllUnionObjectTypeInfos()) { + result.addUnionChild(convertTypeInfo(child)); + } + return result; + } + case STRUCT: { + StructTypeInfo sinfo = (StructTypeInfo) info; + TypeDescription result = TypeDescription.createStruct(); + for(String fieldName: sinfo.getAllStructFieldNames()) { + result.addField(fieldName, + convertTypeInfo(sinfo.getStructFieldTypeInfo(fieldName))); + } + return result; + } + default: + throw new IllegalArgumentException("ORC doesn't handle " + + info.getCategory()); + } + } + + public static List getOrcTypes(TypeDescription typeDescr) { + List result = Lists.newArrayList(); + appendOrcTypes(result, typeDescr); + return result; + } + + private static void appendOrcTypes(List result, TypeDescription typeDescr) { + OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); + List children = typeDescr.getChildren(); + switch (typeDescr.getCategory()) { + case BOOLEAN: + type.setKind(OrcProto.Type.Kind.BOOLEAN); + break; + case BYTE: + type.setKind(OrcProto.Type.Kind.BYTE); + break; + case SHORT: + type.setKind(OrcProto.Type.Kind.SHORT); + break; + case INT: + type.setKind(OrcProto.Type.Kind.INT); + break; + case LONG: + type.setKind(OrcProto.Type.Kind.LONG); + break; + case FLOAT: + type.setKind(OrcProto.Type.Kind.FLOAT); + break; + case DOUBLE: + type.setKind(OrcProto.Type.Kind.DOUBLE); + break; + case STRING: + type.setKind(OrcProto.Type.Kind.STRING); + break; + case CHAR: + type.setKind(OrcProto.Type.Kind.CHAR); + type.setMaximumLength(typeDescr.getMaxLength()); + break; + case VARCHAR: + type.setKind(Type.Kind.VARCHAR); + type.setMaximumLength(typeDescr.getMaxLength()); + break; + case BINARY: + type.setKind(OrcProto.Type.Kind.BINARY); + break; + case TIMESTAMP: + type.setKind(OrcProto.Type.Kind.TIMESTAMP); + break; + case DATE: + type.setKind(OrcProto.Type.Kind.DATE); + break; + case DECIMAL: + type.setKind(OrcProto.Type.Kind.DECIMAL); + type.setPrecision(typeDescr.getPrecision()); + type.setScale(typeDescr.getScale()); + break; + case LIST: + type.setKind(OrcProto.Type.Kind.LIST); + type.addSubtypes(children.get(0).getId()); + break; + case MAP: + type.setKind(OrcProto.Type.Kind.MAP); + for(TypeDescription t: children) { + type.addSubtypes(t.getId()); + } + break; + case STRUCT: + type.setKind(OrcProto.Type.Kind.STRUCT); + for(TypeDescription t: children) { + type.addSubtypes(t.getId()); + } + for(String field: typeDescr.getFieldNames()) { + type.addFieldNames(field); + } + break; + case UNION: + type.setKind(OrcProto.Type.Kind.UNION); + for(TypeDescription t: children) { + type.addSubtypes(t.getId()); + } + break; + default: + throw new IllegalArgumentException("Unknown category: " + + typeDescr.getCategory()); + } + result.add(type.build()); + if (children != null) { + for(TypeDescription child: children) { + appendOrcTypes(result, child); + } + } + } + + /** + * NOTE: This method ignores the subtype numbers in the TypeDescription rebuilds the subtype + * numbers based on the length of the result list being appended. + * + * @param result + * @param typeInfo + */ + public static void appendOrcTypesRebuildSubtypes(List result, + TypeDescription typeDescr) { + + int subtype = result.size(); + OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); + boolean needsAdd = true; + List children = typeDescr.getChildren(); + switch (typeDescr.getCategory()) { + case BOOLEAN: + type.setKind(OrcProto.Type.Kind.BOOLEAN); + break; + case BYTE: + type.setKind(OrcProto.Type.Kind.BYTE); + break; + case SHORT: + type.setKind(OrcProto.Type.Kind.SHORT); + break; + case INT: + type.setKind(OrcProto.Type.Kind.INT); + break; + case LONG: + type.setKind(OrcProto.Type.Kind.LONG); + break; + case FLOAT: + type.setKind(OrcProto.Type.Kind.FLOAT); + break; + case DOUBLE: + type.setKind(OrcProto.Type.Kind.DOUBLE); + break; + case STRING: + type.setKind(OrcProto.Type.Kind.STRING); + break; + case CHAR: + type.setKind(OrcProto.Type.Kind.CHAR); + type.setMaximumLength(typeDescr.getMaxLength()); + break; + case VARCHAR: + type.setKind(Type.Kind.VARCHAR); + type.setMaximumLength(typeDescr.getMaxLength()); + break; + case BINARY: + type.setKind(OrcProto.Type.Kind.BINARY); + break; + case TIMESTAMP: + type.setKind(OrcProto.Type.Kind.TIMESTAMP); + break; + case DATE: + type.setKind(OrcProto.Type.Kind.DATE); + break; + case DECIMAL: + type.setKind(OrcProto.Type.Kind.DECIMAL); + type.setPrecision(typeDescr.getPrecision()); + type.setScale(typeDescr.getScale()); + break; + case LIST: + type.setKind(OrcProto.Type.Kind.LIST); + type.addSubtypes(++subtype); + result.add(type.build()); + needsAdd = false; + appendOrcTypesRebuildSubtypes(result, children.get(0)); + break; + case MAP: + { + // Make room for MAP type. + result.add(null); + + // Add MAP type pair in order to determine their subtype values. + appendOrcTypesRebuildSubtypes(result, children.get(0)); + int subtype2 = result.size(); + appendOrcTypesRebuildSubtypes(result, children.get(1)); + type.setKind(OrcProto.Type.Kind.MAP); + type.addSubtypes(subtype + 1); + type.addSubtypes(subtype2); + result.set(subtype, type.build()); + needsAdd = false; + } + break; + case STRUCT: + { + List fieldNames = typeDescr.getFieldNames(); + + // Make room for STRUCT type. + result.add(null); + + List fieldSubtypes = new ArrayList(fieldNames.size()); + for(TypeDescription child: children) { + int fieldSubtype = result.size(); + fieldSubtypes.add(fieldSubtype); + appendOrcTypesRebuildSubtypes(result, child); + } + + type.setKind(OrcProto.Type.Kind.STRUCT); + + for (int i = 0 ; i < fieldNames.size(); i++) { + type.addSubtypes(fieldSubtypes.get(i)); + type.addFieldNames(fieldNames.get(i)); + } + result.set(subtype, type.build()); + needsAdd = false; + } + break; + case UNION: + { + // Make room for UNION type. + result.add(null); + + List unionSubtypes = new ArrayList(children.size()); + for(TypeDescription child: children) { + int unionSubtype = result.size(); + unionSubtypes.add(unionSubtype); + appendOrcTypesRebuildSubtypes(result, child); + } + + type.setKind(OrcProto.Type.Kind.UNION); + for (int i = 0 ; i < children.size(); i++) { + type.addSubtypes(unionSubtypes.get(i)); + } + result.set(subtype, type.build()); + needsAdd = false; + } + break; + default: + throw new IllegalArgumentException("Unknown category: " + typeDescr.getCategory()); + } + if (needsAdd) { + result.add(type.build()); + } + } + + /** + * NOTE: This method ignores the subtype numbers in the OrcProto.Type rebuilds the subtype + * numbers based on the length of the result list being appended. + * + * @param result + * @param typeInfo + */ + public static int appendOrcTypesRebuildSubtypes(List result, + List types, int columnId) { + + OrcProto.Type oldType = types.get(columnId++); + + int subtype = result.size(); + OrcProto.Type.Builder builder = OrcProto.Type.newBuilder(); + boolean needsAdd = true; + switch (oldType.getKind()) { + case BOOLEAN: + builder.setKind(OrcProto.Type.Kind.BOOLEAN); + break; + case BYTE: + builder.setKind(OrcProto.Type.Kind.BYTE); + break; + case SHORT: + builder.setKind(OrcProto.Type.Kind.SHORT); + break; + case INT: + builder.setKind(OrcProto.Type.Kind.INT); + break; + case LONG: + builder.setKind(OrcProto.Type.Kind.LONG); + break; + case FLOAT: + builder.setKind(OrcProto.Type.Kind.FLOAT); + break; + case DOUBLE: + builder.setKind(OrcProto.Type.Kind.DOUBLE); + break; + case STRING: + builder.setKind(OrcProto.Type.Kind.STRING); + break; + case CHAR: + builder.setKind(OrcProto.Type.Kind.CHAR); + builder.setMaximumLength(oldType.getMaximumLength()); + break; + case VARCHAR: + builder.setKind(Type.Kind.VARCHAR); + builder.setMaximumLength(oldType.getMaximumLength()); + break; + case BINARY: + builder.setKind(OrcProto.Type.Kind.BINARY); + break; + case TIMESTAMP: + builder.setKind(OrcProto.Type.Kind.TIMESTAMP); + break; + case DATE: + builder.setKind(OrcProto.Type.Kind.DATE); + break; + case DECIMAL: + builder.setKind(OrcProto.Type.Kind.DECIMAL); + builder.setPrecision(oldType.getPrecision()); + builder.setScale(oldType.getScale()); + break; + case LIST: + builder.setKind(OrcProto.Type.Kind.LIST); + builder.addSubtypes(++subtype); + result.add(builder.build()); + needsAdd = false; + columnId = appendOrcTypesRebuildSubtypes(result, types, columnId); + break; + case MAP: + { + // Make room for MAP type. + result.add(null); + + // Add MAP type pair in order to determine their subtype values. + columnId = appendOrcTypesRebuildSubtypes(result, types, columnId); + int subtype2 = result.size(); + columnId = appendOrcTypesRebuildSubtypes(result, types, columnId); + builder.setKind(OrcProto.Type.Kind.MAP); + builder.addSubtypes(subtype + 1); + builder.addSubtypes(subtype2); + result.set(subtype, builder.build()); + needsAdd = false; + } + break; + case STRUCT: + { + List fieldNames = oldType.getFieldNamesList(); + + // Make room for STRUCT type. + result.add(null); + + List fieldSubtypes = new ArrayList(fieldNames.size()); + for(int i = 0 ; i < fieldNames.size(); i++) { + int fieldSubtype = result.size(); + fieldSubtypes.add(fieldSubtype); + columnId = appendOrcTypesRebuildSubtypes(result, types, columnId); + } + + builder.setKind(OrcProto.Type.Kind.STRUCT); + + for (int i = 0 ; i < fieldNames.size(); i++) { + builder.addSubtypes(fieldSubtypes.get(i)); + builder.addFieldNames(fieldNames.get(i)); + } + result.set(subtype, builder.build()); + needsAdd = false; + } + break; + case UNION: + { + int subtypeCount = oldType.getSubtypesCount(); + + // Make room for UNION type. + result.add(null); + + List unionSubtypes = new ArrayList(subtypeCount); + for(int i = 0 ; i < subtypeCount; i++) { + int unionSubtype = result.size(); + unionSubtypes.add(unionSubtype); + columnId = appendOrcTypesRebuildSubtypes(result, types, columnId); + } + + builder.setKind(OrcProto.Type.Kind.UNION); + for (int i = 0 ; i < subtypeCount; i++) { + builder.addSubtypes(unionSubtypes.get(i)); + } + result.set(subtype, builder.build()); + needsAdd = false; + } + break; + default: + throw new IllegalArgumentException("Unknown category: " + oldType.getKind()); + } + if (needsAdd) { + result.add(builder.build()); + } + return columnId; + } + + public static TypeDescription getDesiredRowTypeDescr(Configuration conf, boolean isAcid) { + + String columnNameProperty = null; + String columnTypeProperty = null; + + ArrayList schemaEvolutionColumnNames = null; + ArrayList schemaEvolutionTypeDescrs = null; + + boolean haveSchemaEvolutionProperties = false; + if (isAcid || HiveConf.getBoolVar(conf, ConfVars.HIVE_SCHEMA_EVOLUTION)) { + + columnNameProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS); + columnTypeProperty = conf.get(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES); + + haveSchemaEvolutionProperties = + (columnNameProperty != null && columnTypeProperty != null); + + if (haveSchemaEvolutionProperties) { + schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(",")); + if (schemaEvolutionColumnNames.size() == 0) { + haveSchemaEvolutionProperties = false; + } else { + schemaEvolutionTypeDescrs = + OrcUtils.typeDescriptionsFromHiveTypeProperty(columnTypeProperty); + if (schemaEvolutionTypeDescrs.size() != schemaEvolutionColumnNames.size()) { + haveSchemaEvolutionProperties = false; + } + } + } + } + + if (haveSchemaEvolutionProperties) { + LOG.info("Using schema evolution configuration variables " + + "schema.evolution.columns " + + schemaEvolutionColumnNames.toString() + + " / schema.evolution.columns.types " + + schemaEvolutionTypeDescrs.toString() + + " (isAcid " + + isAcid + + ")"); + + } else { + + // Try regular properties; + columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); + columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); + if (columnTypeProperty == null || columnNameProperty == null) { + return null; + } + + schemaEvolutionColumnNames = Lists.newArrayList(columnNameProperty.split(",")); + if (schemaEvolutionColumnNames.size() == 0) { + return null; + } + schemaEvolutionTypeDescrs = + OrcUtils.typeDescriptionsFromHiveTypeProperty(columnTypeProperty); + if (schemaEvolutionTypeDescrs.size() != schemaEvolutionColumnNames.size()) { + return null; + } + LOG.info("Using column configuration variables " + + "columns " + + schemaEvolutionColumnNames.toString() + + " / columns.types " + + schemaEvolutionTypeDescrs.toString() + + " (isAcid " + + isAcid + + ")"); + } + + // Desired schema does not include virtual columns or partition columns. + TypeDescription result = TypeDescription.createStruct(); + for (int i = 0; i < schemaEvolutionColumnNames.size(); i++) { + result.addField(schemaEvolutionColumnNames.get(i), schemaEvolutionTypeDescrs.get(i)); + } + + return result; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java index 8558592..6dbe461 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java @@ -22,6 +22,7 @@ import java.nio.ByteBuffer; import java.util.List; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -152,6 +153,7 @@ private boolean[] include; private long offset = 0; private long length = Long.MAX_VALUE; + private TypeDescription schema; private SearchArgument sarg = null; private String[] columnNames = null; @@ -178,6 +180,14 @@ public Options range(long offset, long length) { } /** + * Set the schema on read type description. + */ + public Options schema(TypeDescription schema) { + this.schema = schema; + return this; + } + + /** * Set search argument for predicate push down. * @param sarg the search argument * @param columnNames the column names for @@ -201,6 +211,10 @@ public long getLength() { return length; } + public TypeDescription getSchema() { + return schema; + } + public SearchArgument getSearchArgument() { return sarg; } @@ -222,6 +236,7 @@ public Options clone() { result.include = include; result.offset = offset; result.length = length; + result.schema = schema; result.sarg = sarg; result.columnNames = columnNames; return result; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java deleted file mode 100644 index 8740ee6..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderFactory.java +++ /dev/null @@ -1,269 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; - -import com.google.common.collect.Lists; - -/** - * Factory to create ORC tree readers. It also compares file schema with schema specified on read - * to see if type promotions are possible. - */ -public class RecordReaderFactory { - static final Log LOG = LogFactory.getLog(RecordReaderFactory.class); - private static final boolean isLogInfoEnabled = LOG.isInfoEnabled(); - - public static TreeReaderFactory.TreeReader createTreeReader(int colId, - Configuration conf, - List fileSchema, - boolean[] included, - boolean skipCorrupt) throws IOException { - final boolean isAcid = checkAcidSchema(fileSchema); - final List originalFileSchema; - if (isAcid) { - originalFileSchema = fileSchema.subList(fileSchema.get(0).getSubtypesCount(), - fileSchema.size()); - } else { - originalFileSchema = fileSchema; - } - final int numCols = originalFileSchema.get(0).getSubtypesCount(); - List schemaOnRead = getSchemaOnRead(numCols, conf); - List schemaUsed = getMatchingSchema(fileSchema, schemaOnRead); - if (schemaUsed == null) { - return TreeReaderFactory.createTreeReader(colId, fileSchema, included, skipCorrupt); - } else { - return ConversionTreeReaderFactory.createTreeReader(colId, schemaUsed, included, skipCorrupt); - } - } - - private static boolean checkAcidSchema(List fileSchema) { - if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) { - List acidFields = OrcRecordUpdater.getAcidEventFields(); - List rootFields = fileSchema.get(0).getFieldNamesList(); - if (acidFields.equals(rootFields)) { - return true; - } - } - return false; - } - - private static List getMatchingSchema(List fileSchema, - List schemaOnRead) { - if (schemaOnRead == null) { - if (isLogInfoEnabled) { - LOG.info("Schema is not specified on read. Using file schema."); - } - return null; - } - - if (fileSchema.size() != schemaOnRead.size()) { - if (isLogInfoEnabled) { - LOG.info("Schema on read column count does not match file schema's column count." + - " Falling back to using file schema."); - } - return null; - } else { - List result = Lists.newArrayList(fileSchema); - // check type promotion. ORC can only support type promotions for integer types - // short -> int -> bigint as same integer readers are used for the above types. - boolean canPromoteType = false; - for (int i = 0; i < fileSchema.size(); i++) { - OrcProto.Type fColType = fileSchema.get(i); - OrcProto.Type rColType = schemaOnRead.get(i); - if (!fColType.getKind().equals(rColType.getKind())) { - - if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) { - - if (rColType.getKind().equals(OrcProto.Type.Kind.INT) || - rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { - // type promotion possible, converting SHORT to INT/LONG requested type - result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); - canPromoteType = true; - } else { - canPromoteType = false; - } - - } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) { - - if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { - // type promotion possible, converting INT to LONG requested type - result.set(i, result.get(i).toBuilder().setKind(rColType.getKind()).build()); - canPromoteType = true; - } else { - canPromoteType = false; - } - - } else { - canPromoteType = false; - } - } - } - - if (canPromoteType) { - if (isLogInfoEnabled) { - LOG.info("Integer type promotion happened in ORC record reader. Using promoted schema."); - } - return result; - } - } - - return null; - } - - private static List getSchemaOnRead(int numCols, Configuration conf) { - String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); - final String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); - if (columnTypeProperty == null || columnNameProperty == null) { - return null; - } - - ArrayList columnNames = Lists.newArrayList(columnNameProperty.split(",")); - ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - StructTypeInfo structTypeInfo = new StructTypeInfo(); - // Column types from conf includes virtual and partition columns at the end. We consider only - // the actual columns in the file. - structTypeInfo.setAllStructFieldNames(Lists.newArrayList(columnNames.subList(0, numCols))); - structTypeInfo.setAllStructFieldTypeInfos(Lists.newArrayList(fieldTypes.subList(0, numCols))); - ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(structTypeInfo); - return getOrcTypes(oi); - } - - private static List getOrcTypes(ObjectInspector inspector) { - List result = Lists.newArrayList(); - getOrcTypesImpl(result, inspector); - return result; - } - - private static void getOrcTypesImpl(List result, ObjectInspector inspector) { - OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); - switch (inspector.getCategory()) { - case PRIMITIVE: - switch (((PrimitiveObjectInspector) inspector).getPrimitiveCategory()) { - case BOOLEAN: - type.setKind(OrcProto.Type.Kind.BOOLEAN); - break; - case BYTE: - type.setKind(OrcProto.Type.Kind.BYTE); - break; - case SHORT: - type.setKind(OrcProto.Type.Kind.SHORT); - break; - case INT: - type.setKind(OrcProto.Type.Kind.INT); - break; - case LONG: - type.setKind(OrcProto.Type.Kind.LONG); - break; - case FLOAT: - type.setKind(OrcProto.Type.Kind.FLOAT); - break; - case DOUBLE: - type.setKind(OrcProto.Type.Kind.DOUBLE); - break; - case STRING: - type.setKind(OrcProto.Type.Kind.STRING); - break; - case CHAR: - // The char length needs to be written to file and should be available - // from the object inspector - CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) inspector) - .getTypeInfo(); - type.setKind(OrcProto.Type.Kind.CHAR); - type.setMaximumLength(charTypeInfo.getLength()); - break; - case VARCHAR: - // The varchar length needs to be written to file and should be available - // from the object inspector - VarcharTypeInfo typeInfo = (VarcharTypeInfo) ((PrimitiveObjectInspector) inspector) - .getTypeInfo(); - type.setKind(OrcProto.Type.Kind.VARCHAR); - type.setMaximumLength(typeInfo.getLength()); - break; - case BINARY: - type.setKind(OrcProto.Type.Kind.BINARY); - break; - case TIMESTAMP: - type.setKind(OrcProto.Type.Kind.TIMESTAMP); - break; - case DATE: - type.setKind(OrcProto.Type.Kind.DATE); - break; - case DECIMAL: - DecimalTypeInfo decTypeInfo = (DecimalTypeInfo) ((PrimitiveObjectInspector) inspector) - .getTypeInfo(); - type.setKind(OrcProto.Type.Kind.DECIMAL); - type.setPrecision(decTypeInfo.precision()); - type.setScale(decTypeInfo.scale()); - break; - default: - throw new IllegalArgumentException("Unknown primitive category: " + - ((PrimitiveObjectInspector) inspector).getPrimitiveCategory()); - } - result.add(type.build()); - break; - case LIST: - type.setKind(OrcProto.Type.Kind.LIST); - result.add(type.build()); - getOrcTypesImpl(result, ((ListObjectInspector) inspector).getListElementObjectInspector()); - break; - case MAP: - type.setKind(OrcProto.Type.Kind.MAP); - result.add(type.build()); - getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapKeyObjectInspector()); - getOrcTypesImpl(result, ((MapObjectInspector) inspector).getMapValueObjectInspector()); - break; - case STRUCT: - type.setKind(OrcProto.Type.Kind.STRUCT); - result.add(type.build()); - for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) { - getOrcTypesImpl(result, field.getFieldObjectInspector()); - } - break; - case UNION: - type.setKind(OrcProto.Type.Kind.UNION); - result.add(type.build()); - for (ObjectInspector oi : ((UnionObjectInspector) inspector).getObjectInspectors()) { - getOrcTypesImpl(result, oi); - } - break; - default: - throw new IllegalArgumentException("Unknown category: " + inspector.getCategory()); - } - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index c2d280d..24834a5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hive.ql.io.filters.BloomFilterIO; import org.apache.hadoop.hive.ql.io.orc.RecordReaderUtils.ByteBufferAllocatorPool; import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReader; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; +import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReaderSchema; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -154,15 +156,27 @@ static int findColumns(String[] columnNames, } protected RecordReaderImpl(List stripes, - FileSystem fileSystem, - Path path, - Reader.Options options, - List types, - CompressionCodec codec, - int bufferSize, - long strideRate, - Configuration conf - ) throws IOException { + FileSystem fileSystem, + Path path, + Reader.Options options, + List types, + CompressionCodec codec, + int bufferSize, + long strideRate, + Configuration conf + ) throws IOException { + + TreeReaderSchema treeReaderSchema; + if (options.getSchema() == null) { + treeReaderSchema = new TreeReaderSchema().fileTypes(types).schemaTypes(types); + } else { + + // Now that we are creating a record reader for a file, validate that the schema to read + // is compatible with the file schema. + // + List schemaTypes = OrcUtils.getOrcTypes(options.getSchema()); + treeReaderSchema = SchemaEvolution.validateAndCreate(types, schemaTypes); + } this.path = path; this.file = fileSystem.open(path); this.codec = codec; @@ -200,7 +214,7 @@ protected RecordReaderImpl(List stripes, firstRow = skippedRows; totalRowCount = rows; boolean skipCorrupt = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_ORC_SKIP_CORRUPT_DATA); - reader = RecordReaderFactory.createTreeReader(0, conf, types, included, skipCorrupt); + reader = TreeReaderFactory.createTreeReader(0, treeReaderSchema, included, skipCorrupt); indexes = new OrcProto.RowIndex[types.size()]; bloomFilterIndices = new OrcProto.BloomFilterIndex[types.size()]; advanceToNextRow(reader, 0L, true); @@ -1085,6 +1099,7 @@ public VectorizedRowBatch nextBatch(VectorizedRowBatch previous) throws IOExcept } else { result = (VectorizedRowBatch) previous; result.selectedInUse = false; + reader.setVectorColumnCount(result.getDataColumnCount()); reader.nextVector(result.cols, (int) batchSize); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java new file mode 100644 index 0000000..9d00eb2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/SchemaEvolution.java @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type; +import org.apache.hadoop.hive.ql.io.orc.TreeReaderFactory.TreeReaderSchema; + +/** + * Take the file types and the (optional) configuration column names/types and see if there + * has been schema evolution. + */ +public class SchemaEvolution { + + private static final Log LOG = LogFactory.getLog(SchemaEvolution.class); + + public static TreeReaderSchema validateAndCreate(List fileTypes, + List schemaTypes) throws IOException { + + // For ACID, the row is the ROW field in the outer STRUCT. + final boolean isAcid = checkAcidSchema(fileTypes); + final List rowSchema; + int rowSubtype; + if (isAcid) { + rowSubtype = OrcRecordUpdater.ROW + 1; + rowSchema = fileTypes.subList(rowSubtype, fileTypes.size()); + } else { + rowSubtype = 0; + rowSchema = fileTypes; + } + + // Do checking on the overlap. Additional columns will be defaulted to NULL. + + int numFileColumns = rowSchema.get(0).getSubtypesCount(); + int numDesiredColumns = schemaTypes.get(0).getSubtypesCount(); + + int numReadColumns = Math.min(numFileColumns, numDesiredColumns); + + /** + * Check type promotion. + * + * Currently, we only support integer type promotions that can be done "implicitly". + * That is, we know that using a bigger integer tree reader on the original smaller integer + * column will "just work". + * + * In the future, other type promotions might require type conversion. + */ + // short -> int -> bigint as same integer readers are used for the above types. + + for (int i = 0; i < numReadColumns; i++) { + OrcProto.Type fColType = fileTypes.get(rowSubtype + i); + OrcProto.Type rColType = schemaTypes.get(i); + if (!fColType.getKind().equals(rColType.getKind())) { + + boolean ok = false; + if (fColType.getKind().equals(OrcProto.Type.Kind.SHORT)) { + + if (rColType.getKind().equals(OrcProto.Type.Kind.INT) || + rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { + // type promotion possible, converting SHORT to INT/LONG requested type + ok = true; + } + } else if (fColType.getKind().equals(OrcProto.Type.Kind.INT)) { + + if (rColType.getKind().equals(OrcProto.Type.Kind.LONG)) { + // type promotion possible, converting INT to LONG requested type + ok = true; + } + } + + if (!ok) { + throw new IOException("ORC does not support type conversion from " + + fColType.getKind().name() + " to " + rColType.getKind().name()); + } + } + } + + List fullSchemaTypes; + + if (isAcid) { + fullSchemaTypes = new ArrayList(); + + // This copies the ACID struct type which is subtype = 0. + // It has field names "operation" through "row". + // And we copy the types for all fields EXCEPT ROW (which must be last!). + + for (int i = 0; i < rowSubtype; i++) { + fullSchemaTypes.add(fileTypes.get(i).toBuilder().build()); + } + + // Add the row struct type. + OrcUtils.appendOrcTypesRebuildSubtypes(fullSchemaTypes, schemaTypes, 0); + } else { + fullSchemaTypes = schemaTypes; + } + + int innerStructSubtype = rowSubtype; + + // LOG.info("Schema evolution: (fileTypes) " + fileTypes.toString() + + // " (schemaEvolutionTypes) " + schemaEvolutionTypes.toString()); + + return new TreeReaderSchema(). + fileTypes(fileTypes). + schemaTypes(fullSchemaTypes). + innerStructSubtype(innerStructSubtype); + } + + private static boolean checkAcidSchema(List fileSchema) { + if (fileSchema.get(0).getKind().equals(OrcProto.Type.Kind.STRUCT)) { + List rootFields = fileSchema.get(0).getFieldNamesList(); + if (acidEventFieldNames.equals(rootFields)) { + return true; + } + } + return false; + } + + /** + * @param typeDescr + * @return ORC types for the ACID event based on the row's type description + */ + public static List createEventSchema(TypeDescription typeDescr) { + + List result = new ArrayList(); + + OrcProto.Type.Builder type = OrcProto.Type.newBuilder(); + type.setKind(OrcProto.Type.Kind.STRUCT); + type.addAllFieldNames(acidEventFieldNames); + for (int i = 0; i < acidEventFieldNames.size(); i++) { + type.addSubtypes(i + 1); + } + result.add(type.build()); + + // Automatically add all fields except the last (ROW). + for (int i = 0; i < acidEventOrcTypeKinds.size() - 1; i ++) { + type.clear(); + type.setKind(acidEventOrcTypeKinds.get(i)); + result.add(type.build()); + } + + OrcUtils.appendOrcTypesRebuildSubtypes(result, typeDescr); + return result; + } + + public static final List acidEventFieldNames= new ArrayList(); + static { + acidEventFieldNames.add("operation"); + acidEventFieldNames.add("originalTransaction"); + acidEventFieldNames.add("bucket"); + acidEventFieldNames.add("rowId"); + acidEventFieldNames.add("currentTransaction"); + acidEventFieldNames.add("row"); + } + public static final List acidEventOrcTypeKinds = + new ArrayList(); + static { + acidEventOrcTypeKinds.add(OrcProto.Type.Kind.INT); + acidEventOrcTypeKinds.add(OrcProto.Type.Kind.LONG); + acidEventOrcTypeKinds.add(OrcProto.Type.Kind.INT); + acidEventOrcTypeKinds.add(OrcProto.Type.Kind.LONG); + acidEventOrcTypeKinds.add(OrcProto.Type.Kind.LONG); + acidEventOrcTypeKinds.add(OrcProto.Type.Kind.STRUCT); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java index ecd9b14..22f61ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java @@ -30,6 +30,9 @@ import java.util.Map; import java.util.TimeZone; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; @@ -50,6 +53,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.hive.shims.HadoopShims.TextReaderShim; +import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -62,10 +66,64 @@ */ public class TreeReaderFactory { - protected abstract static class TreeReader { + private static final Log LOG = LogFactory.getLog(TreeReaderFactory.class); + + public static class TreeReaderSchema { + + /** + * The types in the ORC file. + */ + List fileTypes; + + /** + * The treeReaderSchema that the reader should read as. + */ + List schemaTypes; + + /** + * The subtype of the row STRUCT. Different than 0 for ACID. + */ + int innerStructSubtype; + + public TreeReaderSchema() { + fileTypes = null; + schemaTypes = null; + innerStructSubtype = -1; + } + + public TreeReaderSchema fileTypes(List fileTypes) { + this.fileTypes = fileTypes; + return this; + } + + public TreeReaderSchema schemaTypes(List schemaTypes) { + this.schemaTypes = schemaTypes; + return this; + } + + public TreeReaderSchema innerStructSubtype(int innerStructSubtype) { + this.innerStructSubtype = innerStructSubtype; + return this; + } + + public List getFileTypes() { + return fileTypes; + } + + public List getSchemaTypes() { + return schemaTypes; + } + + public int getInnerStructSubtype() { + return innerStructSubtype; + } + } + + public abstract static class TreeReader { protected final int columnId; protected BitFieldReader present = null; protected boolean valuePresent = false; + protected int vectorColumnCount; TreeReader(int columnId) throws IOException { this(columnId, null); @@ -79,6 +137,11 @@ } else { present = new BitFieldReader(in, 1); } + vectorColumnCount = -1; + } + + void setVectorColumnCount(int vectorColumnCount) { + this.vectorColumnCount = vectorColumnCount; } void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { @@ -1947,24 +2010,56 @@ public Object nextVector(Object previousVector, long batchSize) throws IOExcepti } protected static class StructTreeReader extends TreeReader { + private final int fileColumnCount; + private final int resultColumnCount; protected final TreeReader[] fields; private final String[] fieldNames; - StructTreeReader(int columnId, - List types, + protected StructTreeReader( + int columnId, + TreeReaderSchema treeReaderSchema, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); - int fieldCount = type.getFieldNamesCount(); - this.fields = new TreeReader[fieldCount]; - this.fieldNames = new String[fieldCount]; - for (int i = 0; i < fieldCount; ++i) { - int subtype = type.getSubtypes(i); - if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); + + OrcProto.Type fileStructType = treeReaderSchema.getFileTypes().get(columnId); + fileColumnCount = fileStructType.getFieldNamesCount(); + + OrcProto.Type schemaStructType = treeReaderSchema.getSchemaTypes().get(columnId); + + if (columnId == treeReaderSchema.getInnerStructSubtype()) { + // If there are more result columns than reader columns, we will default those additional + // columns to NULL. + resultColumnCount = schemaStructType.getFieldNamesCount(); + } else { + resultColumnCount = fileColumnCount; + } + + this.fields = new TreeReader[fileColumnCount]; + this.fieldNames = new String[fileColumnCount]; + + if (included == null) { + for (int i = 0; i < fileColumnCount; ++i) { + int subtype = schemaStructType.getSubtypes(i); + this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt); + // Use the treeReaderSchema evolution name since file/reader types may not have the real column name. + this.fieldNames[i] = schemaStructType.getFieldNames(i); + } + } else { + for (int i = 0; i < fileColumnCount; ++i) { + int subtype = schemaStructType.getSubtypes(i); + if (subtype >= included.length) { + throw new IOException("subtype " + subtype + " exceeds the included array size " + + included.length + " fileTypes " + treeReaderSchema.getFileTypes().toString() + + " schemaTypes " + treeReaderSchema.getSchemaTypes().toString() + + " innerStructSubtype " + treeReaderSchema.getInnerStructSubtype()); + } + if (included[subtype]) { + this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt); + } + // Use the treeReaderSchema evolution name since file/reader types may not have the real column name. + this.fieldNames[i] = schemaStructType.getFieldNames(i); } - this.fieldNames[i] = type.getFieldNames(i); } } @@ -1984,22 +2079,28 @@ Object next(Object previous) throws IOException { OrcStruct result = null; if (valuePresent) { if (previous == null) { - result = new OrcStruct(fields.length); + result = new OrcStruct(resultColumnCount); } else { result = (OrcStruct) previous; // If the input format was initialized with a file with a // different number of fields, the number of fields needs to // be updated to the correct number - if (result.getNumFields() != fields.length) { - result.setNumFields(fields.length); + if (result.getNumFields() != resultColumnCount) { + result.setNumFields(resultColumnCount); } } - for (int i = 0; i < fields.length; ++i) { + for (int i = 0; i < fileColumnCount; ++i) { if (fields[i] != null) { result.setFieldValue(i, fields[i].next(result.getFieldValue(i))); } } + if (resultColumnCount > fileColumnCount) { + for (int i = fileColumnCount; i < resultColumnCount; ++i) { + // Default new treeReaderSchema evolution fields to NULL. + result.setFieldValue(i, null); + } + } } return result; } @@ -2008,13 +2109,13 @@ Object next(Object previous) throws IOException { public Object nextVector(Object previousVector, long batchSize) throws IOException { final ColumnVector[] result; if (previousVector == null) { - result = new ColumnVector[fields.length]; + result = new ColumnVector[fileColumnCount]; } else { result = (ColumnVector[]) previousVector; } // Read all the members of struct as column vectors - for (int i = 0; i < fields.length; i++) { + for (int i = 0; i < fileColumnCount; i++) { if (fields[i] != null) { if (result[i] == null) { result[i] = (ColumnVector) fields[i].nextVector(null, batchSize); @@ -2023,6 +2124,19 @@ public Object nextVector(Object previousVector, long batchSize) throws IOExcepti } } } + + // Default additional treeReaderSchema evolution fields to NULL. + if (vectorColumnCount != -1 && vectorColumnCount > fileColumnCount) { + for (int i = fileColumnCount; i < vectorColumnCount; ++i) { + ColumnVector colVector = result[i]; + if (colVector != null) { + colVector.isRepeating = true; + colVector.noNulls = false; + colVector.isNull[0] = true; + } + } + } + return result; } @@ -2053,18 +2167,18 @@ void skipRows(long items) throws IOException { protected final TreeReader[] fields; protected RunLengthByteReader tags; - UnionTreeReader(int columnId, - List types, + protected UnionTreeReader(int columnId, + TreeReaderSchema treeReaderSchema, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); + OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId); int fieldCount = type.getSubtypesCount(); this.fields = new TreeReader[fieldCount]; for (int i = 0; i < fieldCount; ++i) { int subtype = type.getSubtypes(i); if (included == null || included[subtype]) { - this.fields[i] = createTreeReader(subtype, types, included, skipCorrupt); + this.fields[i] = createTreeReader(subtype, treeReaderSchema, included, skipCorrupt); } } } @@ -2133,13 +2247,13 @@ void skipRows(long items) throws IOException { protected final TreeReader elementReader; protected IntegerReader lengths = null; - ListTreeReader(int columnId, - List types, + protected ListTreeReader(int columnId, + TreeReaderSchema treeReaderSchema, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); - elementReader = createTreeReader(type.getSubtypes(0), types, included, skipCorrupt); + OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId); + elementReader = createTreeReader(type.getSubtypes(0), treeReaderSchema, included, skipCorrupt); } @Override @@ -2223,21 +2337,21 @@ void skipRows(long items) throws IOException { protected final TreeReader valueReader; protected IntegerReader lengths = null; - MapTreeReader(int columnId, - List types, + protected MapTreeReader(int columnId, + TreeReaderSchema treeReaderSchema, boolean[] included, boolean skipCorrupt) throws IOException { super(columnId); - OrcProto.Type type = types.get(columnId); + OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId); int keyColumn = type.getSubtypes(0); int valueColumn = type.getSubtypes(1); if (included == null || included[keyColumn]) { - keyReader = createTreeReader(keyColumn, types, included, skipCorrupt); + keyReader = createTreeReader(keyColumn, treeReaderSchema, included, skipCorrupt); } else { keyReader = null; } if (included == null || included[valueColumn]) { - valueReader = createTreeReader(valueColumn, types, included, skipCorrupt); + valueReader = createTreeReader(valueColumn, treeReaderSchema, included, skipCorrupt); } else { valueReader = null; } @@ -2317,11 +2431,11 @@ void skipRows(long items) throws IOException { } public static TreeReader createTreeReader(int columnId, - List types, + TreeReaderSchema treeReaderSchema, boolean[] included, boolean skipCorrupt ) throws IOException { - OrcProto.Type type = types.get(columnId); + OrcProto.Type type = treeReaderSchema.getSchemaTypes().get(columnId); switch (type.getKind()) { case BOOLEAN: return new BooleanTreeReader(columnId); @@ -2361,13 +2475,13 @@ public static TreeReader createTreeReader(int columnId, int scale = type.hasScale() ? type.getScale() : HiveDecimal.SYSTEM_DEFAULT_SCALE; return new DecimalTreeReader(columnId, precision, scale); case STRUCT: - return new StructTreeReader(columnId, types, included, skipCorrupt); + return new StructTreeReader(columnId, treeReaderSchema, included, skipCorrupt); case LIST: - return new ListTreeReader(columnId, types, included, skipCorrupt); + return new ListTreeReader(columnId, treeReaderSchema, included, skipCorrupt); case MAP: - return new MapTreeReader(columnId, types, included, skipCorrupt); + return new MapTreeReader(columnId, treeReaderSchema, included, skipCorrupt); case UNION: - return new UnionTreeReader(columnId, types, included, skipCorrupt); + return new UnionTreeReader(columnId, treeReaderSchema, included, skipCorrupt); default: throw new IllegalArgumentException("Unsupported type " + type.getKind()); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java new file mode 100644 index 0000000..3c0d590 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/TypeDescription.java @@ -0,0 +1,514 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * This is the description of the types in an ORC file. + */ +public class TypeDescription { + private static final int MAX_PRECISION = 38; + private static final int MAX_SCALE = 38; + private static final int DEFAULT_PRECISION = 38; + private static final int DEFAULT_SCALE = 10; + private static final int DEFAULT_LENGTH = 256; + public enum Category { + BOOLEAN("boolean", true), + BYTE("tinyint", true), + SHORT("smallint", true), + INT("int", true), + LONG("bigint", true), + FLOAT("float", true), + DOUBLE("double", true), + STRING("string", true), + DATE("date", true), + TIMESTAMP("timestamp", true), + BINARY("binary", true), + DECIMAL("decimal", true), + VARCHAR("varchar", true), + CHAR("char", true), + LIST("array", false), + MAP("map", false), + STRUCT("struct", false), + UNION("union", false); + + Category(String name, boolean isPrimitive) { + this.name = name; + this.isPrimitive = isPrimitive; + } + + final boolean isPrimitive; + final String name; + + public boolean isPrimitive() { + return isPrimitive; + } + + public String getName() { + return name; + } + } + + public static TypeDescription createBoolean() { + return new TypeDescription(Category.BOOLEAN); + } + + public static TypeDescription createByte() { + return new TypeDescription(Category.BYTE); + } + + public static TypeDescription createShort() { + return new TypeDescription(Category.SHORT); + } + + public static TypeDescription createInt() { + return new TypeDescription(Category.INT); + } + + public static TypeDescription createLong() { + return new TypeDescription(Category.LONG); + } + + public static TypeDescription createFloat() { + return new TypeDescription(Category.FLOAT); + } + + public static TypeDescription createDouble() { + return new TypeDescription(Category.DOUBLE); + } + + public static TypeDescription createString() { + return new TypeDescription(Category.STRING); + } + + public static TypeDescription createDate() { + return new TypeDescription(Category.DATE); + } + + public static TypeDescription createTimestamp() { + return new TypeDescription(Category.TIMESTAMP); + } + + public static TypeDescription createBinary() { + return new TypeDescription(Category.BINARY); + } + + public static TypeDescription createDecimal() { + return new TypeDescription(Category.DECIMAL); + } + + /** + * For decimal types, set the precision. + * @param precision the new precision + * @return this + */ + public TypeDescription withPrecision(int precision) { + if (category != Category.DECIMAL) { + throw new IllegalArgumentException("precision is only allowed on decimal"+ + " and not " + category.name); + } else if (precision < 1 || precision > MAX_PRECISION || scale > precision){ + throw new IllegalArgumentException("precision " + precision + + " is out of range 1 .. " + scale); + } + this.precision = precision; + return this; + } + + /** + * For decimal types, set the scale. + * @param scale the new scale + * @return this + */ + public TypeDescription withScale(int scale) { + if (category != Category.DECIMAL) { + throw new IllegalArgumentException("scale is only allowed on decimal"+ + " and not " + category.name); + } else if (scale < 0 || scale > MAX_SCALE || scale > precision) { + throw new IllegalArgumentException("scale is out of range at " + scale); + } + this.scale = scale; + return this; + } + + public static TypeDescription createVarchar() { + return new TypeDescription(Category.VARCHAR); + } + + public static TypeDescription createChar() { + return new TypeDescription(Category.CHAR); + } + + /** + * Set the maximum length for char and varchar types. + * @param maxLength the maximum value + * @return this + */ + public TypeDescription withMaxLength(int maxLength) { + if (category != Category.VARCHAR && category != Category.CHAR) { + throw new IllegalArgumentException("maxLength is only allowed on char" + + " and varchar and not " + category.name); + } + this.maxLength = maxLength; + return this; + } + + public static TypeDescription createList(TypeDescription childType) { + TypeDescription result = new TypeDescription(Category.LIST); + result.children.add(childType); + childType.parent = result; + return result; + } + + public static TypeDescription createMap(TypeDescription keyType, + TypeDescription valueType) { + TypeDescription result = new TypeDescription(Category.MAP); + result.children.add(keyType); + result.children.add(valueType); + keyType.parent = result; + valueType.parent = result; + return result; + } + + public static TypeDescription createUnion() { + return new TypeDescription(Category.UNION); + } + + public static TypeDescription createStruct() { + return new TypeDescription(Category.STRUCT); + } + + /** + * Add a child to a union type. + * @param child a new child type to add + * @return the union type. + */ + public TypeDescription addUnionChild(TypeDescription child) { + if (category != Category.UNION) { + throw new IllegalArgumentException("Can only add types to union type" + + " and not " + category); + } + children.add(child); + child.parent = this; + return this; + } + + /** + * Add a field to a struct type as it is built. + * @param field the field name + * @param fieldType the type of the field + * @return the struct type + */ + public TypeDescription addField(String field, TypeDescription fieldType) { + if (category != Category.STRUCT) { + throw new IllegalArgumentException("Can only add fields to struct type" + + " and not " + category); + } + fieldNames.add(field); + children.add(fieldType); + fieldType.parent = this; + return this; + } + + /** + * Get the id for this type. + * The first call will cause all of the the ids in tree to be assigned, so + * it should not be called before the type is completely built. + * @return the sequential id + */ + public int getId() { + // if the id hasn't been assigned, assign all of the ids from the root + if (id == -1) { + TypeDescription root = this; + while (root.parent != null) { + root = root.parent; + } + root.assignIds(0); + } + return id; + } + + /** + * Get the maximum id assigned to this type or its children. + * The first call will cause all of the the ids in tree to be assigned, so + * it should not be called before the type is completely built. + * @return the maximum id assigned under this type + */ + public int getMaximumId() { + // if the id hasn't been assigned, assign all of the ids from the root + if (maxId == -1) { + TypeDescription root = this; + while (root.parent != null) { + root = root.parent; + } + root.assignIds(0); + } + return maxId; + } + + private ColumnVector createColumn() { + switch (category) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMP: + case DATE: + return new LongColumnVector(); + case FLOAT: + case DOUBLE: + return new DoubleColumnVector(); + case DECIMAL: + return new DecimalColumnVector(precision, scale); + case STRING: + case BINARY: + case CHAR: + case VARCHAR: + return new BytesColumnVector(); + default: + throw new IllegalArgumentException("Unknown type " + category); + } + } + + public VectorizedRowBatch createRowBatch() { + VectorizedRowBatch result; + if (category == Category.STRUCT) { + result = new VectorizedRowBatch(children.size(), + VectorizedRowBatch.DEFAULT_SIZE); + for(int i=0; i < result.cols.length; ++i) { + result.cols[i] = children.get(i).createColumn(); + } + } else { + result = new VectorizedRowBatch(1, VectorizedRowBatch.DEFAULT_SIZE); + result.cols[0] = createColumn(); + } + result.reset(); + return result; + } + + /** + * Get the kind of this type. + * @return get the category for this type. + */ + public Category getCategory() { + return category; + } + + /** + * Get the maximum length of the type. Only used for char and varchar types. + * @return the maximum length of the string type + */ + public int getMaxLength() { + return maxLength; + } + + /** + * Get the precision of the decimal type. + * @return the number of digits for the precision. + */ + public int getPrecision() { + return precision; + } + + /** + * Get the scale of the decimal type. + * @return the number of digits for the scale. + */ + public int getScale() { + return scale; + } + + /** + * For struct types, get the list of field names. + * @return the list of field names. + */ + public List getFieldNames() { + return Collections.unmodifiableList(fieldNames); + } + + /** + * Get the subtypes of this type. + * @return the list of children types + */ + public List getChildren() { + return children == null ? null : Collections.unmodifiableList(children); + } + + /** + * Assign ids to all of the nodes under this one. + * @param startId the lowest id to assign + * @return the next available id + */ + private int assignIds(int startId) { + id = startId++; + if (children != null) { + for (TypeDescription child : children) { + startId = child.assignIds(startId); + } + } + maxId = startId - 1; + return startId; + } + + private TypeDescription(Category category) { + this.category = category; + if (category.isPrimitive) { + children = null; + } else { + children = new ArrayList<>(); + } + if (category == Category.STRUCT) { + fieldNames = new ArrayList<>(); + } else { + fieldNames = null; + } + } + + private int id = -1; + private int maxId = -1; + private TypeDescription parent; + private final Category category; + private final List children; + private final List fieldNames; + private int maxLength = DEFAULT_LENGTH; + private int precision = DEFAULT_PRECISION; + private int scale = DEFAULT_SCALE; + + public void printToBuffer(StringBuilder buffer) { + buffer.append(category.name); + switch (category) { + case DECIMAL: + buffer.append('('); + buffer.append(precision); + buffer.append(','); + buffer.append(scale); + buffer.append(')'); + break; + case CHAR: + case VARCHAR: + buffer.append('('); + buffer.append(maxLength); + buffer.append(')'); + break; + case LIST: + case MAP: + case UNION: + buffer.append('<'); + for(int i=0; i < children.size(); ++i) { + if (i != 0) { + buffer.append(','); + } + children.get(i).printToBuffer(buffer); + } + buffer.append('>'); + break; + case STRUCT: + buffer.append('<'); + for(int i=0; i < children.size(); ++i) { + if (i != 0) { + buffer.append(','); + } + buffer.append(fieldNames.get(i)); + buffer.append(':'); + children.get(i).printToBuffer(buffer); + } + buffer.append('>'); + break; + default: + break; + } + } + + public String toString() { + StringBuilder buffer = new StringBuilder(); + printToBuffer(buffer); + return buffer.toString(); + } + + private void printJsonToBuffer(String prefix, StringBuilder buffer, + int indent) { + for(int i=0; i < indent; ++i) { + buffer.append(' '); + } + buffer.append(prefix); + buffer.append("{\"category\": \""); + buffer.append(category.name); + buffer.append("\", \"id\": "); + buffer.append(getId()); + buffer.append(", \"max\": "); + buffer.append(maxId); + switch (category) { + case DECIMAL: + buffer.append(", \"precision\": "); + buffer.append(precision); + buffer.append(", \"scale\": "); + buffer.append(scale); + break; + case CHAR: + case VARCHAR: + buffer.append(", \"length\": "); + buffer.append(maxLength); + break; + case LIST: + case MAP: + case UNION: + buffer.append(", \"children\": ["); + for(int i=0; i < children.size(); ++i) { + buffer.append('\n'); + children.get(i).printJsonToBuffer("", buffer, indent + 2); + if (i != children.size() - 1) { + buffer.append(','); + } + } + buffer.append("]"); + break; + case STRUCT: + buffer.append(", \"fields\": ["); + for(int i=0; i < children.size(); ++i) { + buffer.append('\n'); + children.get(i).printJsonToBuffer("\"" + fieldNames.get(i) + "\": ", + buffer, indent + 2); + if (i != children.size() - 1) { + buffer.append(','); + } + } + buffer.append(']'); + break; + default: + break; + } + buffer.append('}'); + } + + public String toJson() { + StringBuilder buffer = new StringBuilder(); + printJsonToBuffer("", buffer, 0); + return buffer.toString(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java index a8e5c2e..a2725b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java @@ -25,7 +25,6 @@ import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.DataOutputBuffer; @@ -46,31 +45,25 @@ private final AcidInputFormat.RowReader innerReader; private final RecordIdentifier key; private final OrcStruct value; - private final VectorizedRowBatchCtx rowBatchCtx; + private VectorizedRowBatchCtx rbCtx; + private Object[] partitionValues; private final ObjectInspector objectInspector; private final DataOutputBuffer buffer = new DataOutputBuffer(); VectorizedOrcAcidRowReader(AcidInputFormat.RowReader inner, Configuration conf, + VectorizedRowBatchCtx vectorizedRowBatchCtx, FileSplit split) throws IOException { this.innerReader = inner; this.key = inner.createKey(); - this.rowBatchCtx = new VectorizedRowBatchCtx(); + rbCtx = vectorizedRowBatchCtx; + int partitionColumnCount = rbCtx.getPartitionColumnCount(); + if (partitionColumnCount > 0) { + partitionValues = new Object[partitionColumnCount]; + rbCtx.getPartitionValues(rbCtx, conf, split, partitionValues); + } this.value = inner.createValue(); this.objectInspector = inner.getObjectInspector(); - try { - rowBatchCtx.init(conf, split); - } catch (ClassNotFoundException e) { - throw new IOException("Failed to initialize context", e); - } catch (SerDeException e) { - throw new IOException("Failed to initialize context", e); - } catch (InstantiationException e) { - throw new IOException("Failed to initialize context", e); - } catch (IllegalAccessException e) { - throw new IOException("Failed to initialize context", e); - } catch (HiveException e) { - throw new IOException("Failed to initialize context", e); - } } @Override @@ -82,23 +75,21 @@ public boolean next(NullWritable nullWritable, if (!innerReader.next(key, value)) { return false; } - try { - rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch); - } catch (HiveException e) { - throw new IOException("Problem adding partition column", e); + if (partitionValues != null) { + rbCtx.addPartitionColsToBatch(vectorizedRowBatch, partitionValues); } try { VectorizedBatchUtil.acidAddRowToBatch(value, (StructObjectInspector) objectInspector, - vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer); + vectorizedRowBatch.size++, vectorizedRowBatch, rbCtx, buffer); while (vectorizedRowBatch.size < vectorizedRowBatch.selected.length && innerReader.next(key, value)) { VectorizedBatchUtil.acidAddRowToBatch(value, (StructObjectInspector) objectInspector, - vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer); + vectorizedRowBatch.size++, vectorizedRowBatch, rbCtx, buffer); } - } catch (HiveException he) { - throw new IOException("error iterating", he); + } catch (Exception e) { + throw new IOException("error iterating", e); } return true; } @@ -110,11 +101,7 @@ public NullWritable createKey() { @Override public VectorizedRowBatch createValue() { - try { - return rowBatchCtx.createVectorizedRowBatch(); - } catch (HiveException e) { - throw new RuntimeException("Error creating a batch", e); - } + return rbCtx.createVectorizedRowBatch(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java index bf09001..7a2799d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.IOException; -import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -27,14 +26,12 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.InputFormatChecker; -import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileSplit; @@ -47,7 +44,8 @@ * A MapReduce/Hive input format for ORC files. */ public class VectorizedOrcInputFormat extends FileInputFormat - implements InputFormatChecker, VectorizedInputFormatInterface { + implements InputFormatChecker, VectorizedInputFormatInterface, + SelfDescribingInputFormatInterface { static class VectorizedOrcRecordReader implements RecordReader { @@ -56,12 +54,21 @@ private final long length; private float progress = 0.0f; private VectorizedRowBatchCtx rbCtx; + private final boolean[] columnsToIncludeTruncated; + private final Object[] partitionValues; private boolean addPartitionCols = true; VectorizedOrcRecordReader(Reader file, Configuration conf, FileSplit fileSplit) throws IOException { + + /** + * Do we have schema on read in the configuration variables? + */ + TypeDescription schema = OrcUtils.getDesiredRowTypeDescr(conf, /* isAcid */ false); + List types = file.getTypes(); Reader.Options options = new Reader.Options(); + options.schema(schema); this.offset = fileSplit.getStart(); this.length = fileSplit.getLength(); options.range(offset, length); @@ -69,11 +76,17 @@ OrcInputFormat.setSearchArgument(options, types, conf, true); this.reader = file.rowsOptions(options); - try { - rbCtx = new VectorizedRowBatchCtx(); - rbCtx.init(conf, fileSplit); - } catch (Exception e) { - throw new RuntimeException(e); + + rbCtx = Utilities.getVectorizedRowBatchCtx(conf); + + columnsToIncludeTruncated = rbCtx.getColumnsToIncludeTruncated(conf); + + int partitionColumnCount = rbCtx.getPartitionColumnCount(); + if (partitionColumnCount > 0) { + partitionValues = new Object[partitionColumnCount]; + rbCtx.getPartitionValues(rbCtx, conf, fileSplit, partitionValues); + } else { + partitionValues = null; } } @@ -90,7 +103,9 @@ public boolean next(NullWritable key, VectorizedRowBatch value) throws IOExcepti // as this does not call CreateValue for each new RecordReader it creates, this check is // required in next() if (addPartitionCols) { - rbCtx.addPartitionColsToBatch(value); + if (partitionValues != null) { + rbCtx.addPartitionColsToBatch(value, partitionValues); + } addPartitionCols = false; } reader.nextBatch(value); @@ -108,11 +123,7 @@ public NullWritable createKey() { @Override public VectorizedRowBatch createValue() { - try { - return rbCtx.createVectorizedRowBatch(); - } catch (HiveException e) { - throw new RuntimeException("Error creating a batch", e); - } + return rbCtx.createVectorizedRowBatch(columnsToIncludeTruncated); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java index ed99615..c6070c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java @@ -14,8 +14,11 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; @@ -23,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; @@ -32,7 +36,6 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; - import org.apache.parquet.hadoop.ParquetInputFormat; /** @@ -52,6 +55,7 @@ private final ParquetRecordReaderWrapper internalReader; private VectorizedRowBatchCtx rbCtx; + private Object[] partitionValues; private ArrayWritable internalValues; private NullWritable internalKey; private VectorColumnAssign[] assigners; @@ -65,11 +69,11 @@ public VectorizedParquetRecordReader( split, conf, reporter); - try { - rbCtx = new VectorizedRowBatchCtx(); - rbCtx.init(conf, split); - } catch (Exception e) { - throw new RuntimeException(e); + rbCtx = Utilities.getVectorizedRowBatchCtx(conf); + int partitionColumnCount = rbCtx.getPartitionColumnCount(); + if (partitionColumnCount > 0) { + partitionValues = new Object[partitionColumnCount]; + rbCtx.getPartitionValues(rbCtx, conf, split, partitionValues); } } @@ -81,13 +85,9 @@ public NullWritable createKey() { @Override public VectorizedRowBatch createValue() { - VectorizedRowBatch outputBatch = null; - try { - outputBatch = rbCtx.createVectorizedRowBatch(); - internalValues = internalReader.createValue(); - } catch (HiveException e) { - throw new RuntimeException("Error creating a batch", e); - } + VectorizedRowBatch outputBatch; + outputBatch = rbCtx.createVectorizedRowBatch(); + internalValues = internalReader.createValue(); return outputBatch; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 5708cb8..40d0e34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -109,6 +109,8 @@ import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.stats.StatsFactory; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.InputFormat; @@ -700,6 +702,11 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set iterPath = partDir.iterator(); Iterator iterPartnDesc = partDesc.iterator(); @@ -761,6 +768,7 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set topOp, MapWork plan, boolean local, @@ -770,6 +778,16 @@ public static void setTaskPlan(String path, String alias, return; } + if (topOp instanceof TableScanOperator) { + try { + Utilities.addSchemaEvolutionToTableScanOperator( + (StructObjectInspector) tt_desc.getDeserializer().getObjectInspector(), + (TableScanOperator) topOp); + } catch (Exception e) { + throw new SemanticException(e); + } + } + if (!local) { if (plan.getPathToAliases().get(path) == null) { plan.getPathToAliases().put(path, new ArrayList()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 2af6f9a..20e1ee6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -370,6 +370,7 @@ public final void setFiltered(boolean filtered) { private FetchWork convertToWork() throws HiveException { inputs.clear(); + Utilities.addSchemaEvolutionToTableScanOperator(table, scanOp); TableDesc tableDesc = Utilities.getTableDesc(table); if (!table.isPartitioned()) { inputs.add(new ReadEntity(table, parent, !table.isView() && parent == null)); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 20f9400..5d010cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -26,6 +26,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Properties; import java.util.Set; import java.util.Stack; @@ -33,6 +34,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.*; @@ -63,6 +65,11 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -88,6 +95,7 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; @@ -100,6 +108,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorPartitionDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAsin; @@ -149,6 +158,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import com.google.common.base.Joiner; + public class Vectorizer implements PhysicalPlanResolver { protected static transient final Log LOG = LogFactory.getLog(Vectorizer.class); @@ -311,17 +322,51 @@ public Vectorizer() { supportedAggregationUdfs.add("stddev_samp"); } + private class VectorTaskColumnInfo { + List columnNames; + List typeInfos; + int partitionColumnCount; + + String[] scratchTypeNameArray; + + VectorTaskColumnInfo() { + partitionColumnCount = 0; + } + + public void setColumnNames(List columnNames) { + this.columnNames = columnNames; + } + public void setTypeInfos(List typeInfos) { + this.typeInfos = typeInfos; + } + public void setPartitionColumnCount(int partitionColumnCount) { + this.partitionColumnCount = partitionColumnCount; + } + public void setScratchTypeNameArray(String[] scratchTypeNameArray) { + this.scratchTypeNameArray = scratchTypeNameArray; + } + + public void transferToBaseWork(BaseWork baseWork) { + + String[] columnNameArray = columnNames.toArray(new String[0]); + TypeInfo[] typeInfoArray = typeInfos.toArray(new TypeInfo[0]); + + VectorizedRowBatchCtx vectorizedRowBatchCtx = + new VectorizedRowBatchCtx( + columnNameArray, + typeInfoArray, + partitionColumnCount, + scratchTypeNameArray); + baseWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); + } + } + class VectorizationDispatcher implements Dispatcher { private final PhysicalContext physicalContext; - private List reduceColumnNames; - private List reduceTypeInfos; - public VectorizationDispatcher(PhysicalContext physicalContext) { this.physicalContext = physicalContext; - reduceColumnNames = null; - reduceTypeInfos = null; } @Override @@ -359,9 +404,10 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) } private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException { - boolean ret = validateMapWork(mapWork, isTez); + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); + boolean ret = validateMapWork(mapWork, vectorTaskColumnInfo, isTez); if (ret) { - vectorizeMapWork(mapWork, isTez); + vectorizeMapWork(mapWork, vectorTaskColumnInfo, isTez); } } @@ -372,40 +418,262 @@ private void addMapWorkRules(Map opRules, NodeProcessor np) + ReduceSinkOperator.getOperatorName()), np); } - private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticException { - LOG.info("Validating MapWork..."); + private ImmutablePair verifyOnlyOneTableScanOperator(MapWork mapWork) { // Eliminate MR plans with more than one TableScanOperator. + LinkedHashMap> aliasToWork = mapWork.getAliasToWork(); if ((aliasToWork == null) || (aliasToWork.size() == 0)) { - return false; + return null; } int tableScanCount = 0; - for (Operator op : aliasToWork.values()) { + String alias = ""; + TableScanOperator tableScanOperator = null; + for (Entry> entry : aliasToWork.entrySet()) { + Operator op = entry.getValue(); if (op == null) { LOG.warn("Map work has invalid aliases to work with. Fail validation!"); - return false; + return null; } if (op instanceof TableScanOperator) { tableScanCount++; + alias = entry.getKey(); + tableScanOperator = (TableScanOperator) op; } } if (tableScanCount > 1) { - LOG.warn("Map work has more than 1 TableScanOperator aliases to work with. Fail validation!"); - return false; + LOG.warn("Map work has more than 1 TableScanOperator. Fail validation!"); + return null; + } + return new ImmutablePair(alias, tableScanOperator); + } + + private void getTableScanOperatorSchemaInfo(TableScanOperator tableScanOperator, + List logicalColumnNameList, List logicalTypeInfoList) { + + TableScanDesc tableScanDesc = tableScanOperator.getConf(); + + // Add all non-virtual columns to make a vectorization context for + // the TableScan operator. + RowSchema rowSchema = tableScanOperator.getSchema(); + for (ColumnInfo c : rowSchema.getSignature()) { + // Validation will later exclude vectorization of virtual columns usage (HIVE-5560). + if (!isVirtualColumn(c)) { + String columnName = c.getInternalName(); + String typeName = c.getTypeName(); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName); + + logicalColumnNameList.add(columnName); + logicalTypeInfoList.add(typeInfo); + } + } + } + + private String getColumns(List columnNames, int start, int length, + Character separator) { + return Joiner.on(separator).join(columnNames.subList(start, start + length)); + } + + private String getTypes(List typeInfos, int start, int length) { + return TypeInfoUtils.getTypesString(typeInfos.subList(start, start + length)); + } + + private boolean verifyAndSetVectorPartDesc(PartitionDesc pd) { + + // Look for Pass-Thru case where InputFileFormat has VectorizedInputFormatInterface + // and reads VectorizedRowBatch as a "row". + + if (Utilities.isInputFileFormatVectorized(pd)) { + + pd.setVectorPartitionDesc(VectorPartitionDesc.createVectorizedInputFileFormat()); + + return true; } + LOG.info("Input format: " + pd.getInputFileFormatClassName() + + ", doesn't provide vectorized input"); + + return false; + } + + private boolean validateInputFormatAndSchemaEvolution(MapWork mapWork, String alias, + TableScanOperator tableScanOperator, VectorTaskColumnInfo vectorTaskColumnInfo) { + + // These names/types are the data columns plus partition columns. + final List allColumnNameList = new ArrayList(); + final List allTypeInfoList = new ArrayList(); + + getTableScanOperatorSchemaInfo(tableScanOperator, allColumnNameList, allTypeInfoList); + final int allColumnCount = allColumnNameList.size(); + + // Validate input format and schema evolution capability. + + // For the table, enter a null value in the multi-key map indicating no conversion necessary + // if the schema matches the table. + + HashMap conversionMap = new HashMap(); + + boolean isFirst = true; + int dataColumnCount = 0; + int partitionColumnCount = 0; + + List dataColumnList = null; + String dataColumnsString = ""; + List dataTypeInfoList = null; + // Validate the input format - for (String path : mapWork.getPathToPartitionInfo().keySet()) { - PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path); - List> interfaceList = - Arrays.asList(pd.getInputFileFormatClass().getInterfaces()); - if (!interfaceList.contains(VectorizedInputFormatInterface.class)) { - LOG.info("Input format: " + pd.getInputFileFormatClassName() - + ", doesn't provide vectorized input"); + VectorPartitionConversion partitionConversion = new VectorPartitionConversion(); + LinkedHashMap> pathToAliases = mapWork.getPathToAliases(); + LinkedHashMap pathToPartitionInfo = mapWork.getPathToPartitionInfo(); + for (Entry> entry: pathToAliases.entrySet()) { + String path = entry.getKey(); + List aliases = entry.getValue(); + boolean isPresent = (aliases != null && aliases.indexOf(alias) != -1); + if (!isPresent) { + LOG.info("Alias " + alias + " not present in aliases " + aliases); + return false; + } + PartitionDesc partDesc = pathToPartitionInfo.get(path); + if (partDesc.getVectorPartitionDesc() != null) { + // We seen this already. + continue; + } + if (!verifyAndSetVectorPartDesc(partDesc)) { return false; } + VectorPartitionDesc vectorPartDesc = partDesc.getVectorPartitionDesc(); + LOG.info("Vectorizer path: " + path + ", read type " + + vectorPartDesc.getVectorMapOperatorReadType().name() + ", aliases " + aliases); + + Properties partProps = partDesc.getProperties(); + + String nextDataColumnsString = + partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); + String[] nextDataColumns = nextDataColumnsString.split(","); + + String nextDataTypesString = + partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); + + // We convert to an array of TypeInfo using a library routine since it parses the information + // and can handle use of different separators, etc. We cannot use the raw type string + // for comparison in the map because of the different separators used. + List nextDataTypeInfoList = + TypeInfoUtils.getTypeInfosFromTypeString(nextDataTypesString); + + if (isFirst) { + + // We establish with the first one whether the table is partitioned or not. + + LinkedHashMap partSpec = partDesc.getPartSpec(); + if (partSpec != null && partSpec.size() > 0) { + partitionColumnCount = partSpec.size(); + dataColumnCount = allColumnCount - partitionColumnCount; + } else { + partitionColumnCount = 0; + dataColumnCount = allColumnCount; + } + + dataColumnList = allColumnNameList.subList(0, dataColumnCount); + dataColumnsString = getColumns(allColumnNameList, 0, dataColumnCount, ','); + dataTypeInfoList = allTypeInfoList.subList(0, dataColumnCount); + + // Add the table (non-partitioned) columns and types into the map as not needing + // conversion (i.e. null). + conversionMap.put( + new ImmutablePair(dataColumnsString, dataTypeInfoList), null); + + isFirst = false; + } + + ImmutablePair columnNamesAndTypesCombination = + new ImmutablePair(nextDataColumnsString, nextDataTypeInfoList); + + boolean[] conversionFlags; + if (conversionMap.containsKey(columnNamesAndTypesCombination)) { + + conversionFlags = conversionMap.get(columnNamesAndTypesCombination); + + } else { + + List nextDataColumnList = Arrays.asList(nextDataColumns); + + // Validate the column names that are present are the same. Missing columns will be + // implicitly defaulted to null. + + if (nextDataColumnList.size() > dataColumnList.size()) { + LOG.info( + String.format("Could not vectorize partition %s. The partition column names %d is greater than the number of table columns %d", + path, nextDataColumnList.size(), dataColumnList.size())); + return false; + } + for (int i = 0; i < nextDataColumnList.size(); i++) { + String nextColumnName = nextDataColumnList.get(i); + String tableColumnName = dataColumnList.get(i); + if (!nextColumnName.equals(tableColumnName)) { + LOG.info( + String.format("Could not vectorize partition %s. The partition column name %s is does not match table column name %s", + path, nextColumnName, tableColumnName)); + return false; + } + } + + // The table column types might have been changed with ALTER. There are restrictions + // here for vectorization. + + // Some readers / deserializers take responsibility for conversion themselves. + + // If we need to check for conversion, the conversion object may come back null + // indicating from a vectorization point of view the conversion is implicit. That is, + // all implicit integer upgrades. + + if (vectorPartDesc.getNeedsDataTypeConversionCheck() && + !nextDataTypeInfoList.equals(dataTypeInfoList)) { + + // The results will be in 2 members: validConversion and conversionFlags + partitionConversion.validateConversion(nextDataTypeInfoList, dataTypeInfoList); + if (!partitionConversion.getValidConversion()) { + return false; + } + conversionFlags = partitionConversion.getResultConversionFlags(); + } else { + conversionFlags = null; + } + + // We enter this in our map so we don't have to check again for subsequent partitions. + + conversionMap.put(columnNamesAndTypesCombination, conversionFlags); + } + + vectorPartDesc.setConversionFlags(conversionFlags); + + vectorPartDesc.setTypeInfos(nextDataTypeInfoList); + } + + vectorTaskColumnInfo.setColumnNames(allColumnNameList); + vectorTaskColumnInfo.setTypeInfos(allTypeInfoList); + vectorTaskColumnInfo.setPartitionColumnCount(partitionColumnCount); + + return true; + } + + private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) + throws SemanticException { + + LOG.info("Validating MapWork..."); + + ImmutablePair pair = verifyOnlyOneTableScanOperator(mapWork); + if (pair == null) { + return false; + } + String alias = pair.left; + TableScanOperator tableScanOperator = pair.right; + + // This call fills in the column names, types, and partition column count in + // vectorTaskColumnInfo. + if (!validateInputFormatAndSchemaEvolution(mapWork, alias, tableScanOperator, vectorTaskColumnInfo)) { + return false; } + Map opRules = new LinkedHashMap(); MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); addMapWorkRules(opRules, vnp); @@ -427,11 +695,14 @@ private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticE return true; } - private void vectorizeMapWork(MapWork mapWork, boolean isTez) throws SemanticException { + private void vectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, + boolean isTez) throws SemanticException { + LOG.info("Vectorizing MapWork..."); mapWork.setVectorMode(true); Map opRules = new LinkedHashMap(); - MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork, isTez); + MapWorkVectorizationNodeProcessor vnp = + new MapWorkVectorizationNodeProcessor(mapWork, isTez, vectorTaskColumnInfo); addMapWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -441,9 +712,9 @@ private void vectorizeMapWork(MapWork mapWork, boolean isTez) throws SemanticExc HashMap nodeOutput = new HashMap(); ogw.startWalking(topNodes, nodeOutput); - mapWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); - mapWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); - mapWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); + vectorTaskColumnInfo.setScratchTypeNameArray(vnp.getVectorScratchColumnTypeNames()); + + vectorTaskColumnInfo.transferToBaseWork(mapWork); if (LOG.isDebugEnabled()) { debugDisplayAllMaps(mapWork); @@ -453,13 +724,19 @@ private void vectorizeMapWork(MapWork mapWork, boolean isTez) throws SemanticExc } private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { - boolean ret = validateReduceWork(reduceWork); + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); + boolean ret = validateReduceWork(reduceWork, vectorTaskColumnInfo, isTez); if (ret) { - vectorizeReduceWork(reduceWork, isTez); + vectorizeReduceWork(reduceWork, vectorTaskColumnInfo, isTez); } } - private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException { + private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork, + VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { + + ArrayList reduceColumnNames = new ArrayList(); + ArrayList reduceTypeInfos = new ArrayList(); + try { // Check key ObjectInspector. ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector(); @@ -483,9 +760,6 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws Sema StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector; List valueFields = valueStructObjectInspector.getAllStructFieldRefs(); - reduceColumnNames = new ArrayList(); - reduceTypeInfos = new ArrayList(); - for (StructField field: keyFields) { reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); @@ -497,6 +771,10 @@ private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws Sema } catch (Exception e) { throw new SemanticException(e); } + + vectorTaskColumnInfo.setColumnNames(reduceColumnNames); + vectorTaskColumnInfo.setTypeInfos(reduceTypeInfos); + return true; } @@ -505,11 +783,13 @@ private void addReduceWorkRules(Map opRules, NodeProcessor opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np); } - private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException { + private boolean validateReduceWork(ReduceWork reduceWork, + VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + LOG.info("Validating ReduceWork..."); // Validate input to ReduceWork. - if (!getOnlyStructObjectInspectors(reduceWork)) { + if (!getOnlyStructObjectInspectors(reduceWork, vectorTaskColumnInfo)) { return false; } // Now check the reduce operator tree. @@ -533,7 +813,9 @@ private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticExcepti return true; } - private void vectorizeReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { + private void vectorizeReduceWork(ReduceWork reduceWork, + VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) throws SemanticException { + LOG.info("Vectorizing ReduceWork..."); reduceWork.setVectorMode(true); @@ -542,7 +824,7 @@ private void vectorizeReduceWork(ReduceWork reduceWork, boolean isTez) throws Se // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. Map opRules = new LinkedHashMap(); ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(reduceColumnNames, reduceTypeInfos, isTez); + new ReduceWorkVectorizationNodeProcessor(vectorTaskColumnInfo, isTez); addReduceWorkRules(opRules, vnp); Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); GraphWalker ogw = new PreOrderWalker(disp); @@ -557,9 +839,9 @@ private void vectorizeReduceWork(ReduceWork reduceWork, boolean isTez) throws Se // Necessary since we are vectorizing the root operator in reduce. reduceWork.setReducer(vnp.getRootVectorOp()); - reduceWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); - reduceWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); - reduceWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); + vectorTaskColumnInfo.setScratchTypeNameArray(vnp.getVectorScratchColumnTypeNames()); + + vectorTaskColumnInfo.transferToBaseWork(reduceWork); if (LOG.isDebugEnabled()) { debugDisplayAllMaps(reduceWork); @@ -627,23 +909,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // The vectorization context for the Map or Reduce task. protected VectorizationContext taskVectorizationContext; - // The input projection column type name map for the Map or Reduce task. - protected Map taskColumnTypeNameMap; - VectorizationNodeProcessor() { - taskColumnTypeNameMap = new HashMap(); - } - - public Map getVectorColumnNameMap() { - return taskVectorizationContext.getProjectionColumnMap(); } - public Map getVectorColumnTypeMap() { - return taskColumnTypeNameMap; - } - - public Map getVectorScratchColumnTypeMap() { - return taskVectorizationContext.getScratchColumnTypeMap(); + public String[] getVectorScratchColumnTypeNames() { + return taskVectorizationContext.getScratchColumnTypeNames(); } protected final Set> opsDone = @@ -713,11 +983,14 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { private final MapWork mWork; + private VectorTaskColumnInfo vectorTaskColumnInfo; private final boolean isTez; - public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez) { + public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez, + VectorTaskColumnInfo vectorTaskColumnInfo) { super(); this.mWork = mWork; + this.vectorTaskColumnInfo = vectorTaskColumnInfo; this.isTez = isTez; } @@ -731,8 +1004,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op instanceof TableScanOperator) { if (taskVectorizationContext == null) { - taskVectorizationContext = getVectorizationContext(op.getSchema(), op.getName(), - taskColumnTypeNameMap); + taskVectorizationContext = getVectorizationContext(op.getName(), vectorTaskColumnInfo); } vContext = taskVectorizationContext; } else { @@ -773,8 +1045,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { - private final List reduceColumnNames; - private final List reduceTypeInfos; + private VectorTaskColumnInfo vectorTaskColumnInfo; private boolean isTez; @@ -784,11 +1055,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return rootVectorOp; } - public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames, - List reduceTypeInfos, boolean isTez) { + public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, + boolean isTez) { + super(); - this.reduceColumnNames = reduceColumnNames; - this.reduceTypeInfos = reduceTypeInfos; + this.vectorTaskColumnInfo = vectorTaskColumnInfo; rootVectorOp = null; this.isTez = isTez; } @@ -804,15 +1075,11 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, boolean saveRootVectorOp = false; if (op.getParentOperators().size() == 0) { - LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString()); + LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + vectorTaskColumnInfo.columnNames.toString()); - vContext = new VectorizationContext("__Reduce_Shuffle__", reduceColumnNames); + vContext = new VectorizationContext("__Reduce_Shuffle__", vectorTaskColumnInfo.columnNames); taskVectorizationContext = vContext; - int i = 0; - for (TypeInfo typeInfo : reduceTypeInfos) { - taskColumnTypeNameMap.put(i, typeInfo.getTypeName()); - i++; - } + saveRootVectorOp = true; if (LOG.isDebugEnabled()) { @@ -881,7 +1148,7 @@ protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) { @Override public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException { - this.physicalContext = physicalContext; + hiveConf = physicalContext.getConf(); boolean vectorPath = HiveConf.getBoolVar(hiveConf, @@ -1022,65 +1289,6 @@ private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { return false; } - String columns = ""; - String types = ""; - String partitionColumns = ""; - String partitionTypes = ""; - boolean haveInfo = false; - - // This over-reaches slightly, since we can have > 1 table-scan per map-work. - // It needs path to partition, path to alias, then check the alias == the same table-scan, to be accurate. - // That said, that is a TODO item to be fixed when we support >1 TableScans per vectorized pipeline later. - LinkedHashMap partitionDescs = mWork.getPathToPartitionInfo(); - - // For vectorization, compare each partition information for against the others. - // We assume the table information will be from one of the partitions, so it will - // work to focus on the partition information and not compare against the TableScanOperator - // columns (in the VectorizationContext).... - for (Map.Entry entry : partitionDescs.entrySet()) { - PartitionDesc partDesc = entry.getValue(); - if (partDesc.getPartSpec() == null || partDesc.getPartSpec().isEmpty()) { - // No partition information -- we match because we would default to using the table description. - continue; - } - Properties partProps = partDesc.getProperties(); - if (!haveInfo) { - columns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); - types = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); - partitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - partitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - haveInfo = true; - } else { - String nextColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); - String nextTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); - String nextPartitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - String nextPartitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - if (!columns.equalsIgnoreCase(nextColumns)) { - LOG.info( - String.format("Could not vectorize partition %s. Its column names %s do not match the other column names %s", - entry.getKey(), nextColumns, columns)); - return false; - } - if (!types.equalsIgnoreCase(nextTypes)) { - LOG.info( - String.format("Could not vectorize partition %s. Its column types %s do not match the other column types %s", - entry.getKey(), nextTypes, types)); - return false; - } - if (!partitionColumns.equalsIgnoreCase(nextPartitionColumns)) { - LOG.info( - String.format("Could not vectorize partition %s. Its partition column names %s do not match the other partition column names %s", - entry.getKey(), nextPartitionColumns, partitionColumns)); - return false; - } - if (!partitionTypes.equalsIgnoreCase(nextPartitionTypes)) { - LOG.info( - String.format("Could not vectorize partition %s. Its partition column types %s do not match the other partition column types %s", - entry.getKey(), nextPartitionTypes, partitionTypes)); - return false; - } - } - } return true; } @@ -1412,23 +1620,10 @@ private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mo return result; } - private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName, - Map typeNameMap) { + private VectorizationContext getVectorizationContext(String contextName, + VectorTaskColumnInfo vectorTaskColumnInfo) { - VectorizationContext vContext = new VectorizationContext(contextName); - - // Add all non-virtual columns to make a vectorization context for - // the TableScan operator. - int i = 0; - for (ColumnInfo c : rowSchema.getSignature()) { - // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). - if (!isVirtualColumn(c)) { - vContext.addInitialColumn(c.getInternalName()); - typeNameMap.put(i, c.getTypeName()); - i++; - } - } - vContext.finishedAddingInitialColumns(); + VectorizationContext vContext = new VectorizationContext(contextName, vectorTaskColumnInfo.columnNames); return vContext; } @@ -1785,12 +1980,16 @@ private boolean isVirtualColumn(ColumnInfo column) { public void debugDisplayAllMaps(BaseWork work) { - Map columnNameMap = work.getVectorColumnNameMap(); - Map columnTypeMap = work.getVectorColumnTypeMap(); - Map scratchColumnTypeMap = work.getVectorScratchColumnTypeMap(); + VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx(); + + String[] columnNames = vectorizedRowBatchCtx.getRowColumnNames(); + Object columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount(); + String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames(); - LOG.debug("debugDisplayAllMaps columnNameMap " + columnNameMap.toString()); - LOG.debug("debugDisplayAllMaps columnTypeMap " + columnTypeMap.toString()); - LOG.debug("debugDisplayAllMaps scratchColumnTypeMap " + scratchColumnTypeMap.toString()); + LOG.debug("debugDisplayAllMaps columnNames " + Arrays.toString(columnNames)); + LOG.debug("debugDisplayAllMaps columnTypeInfos " + Arrays.deepToString((Object[]) columnTypeInfos)); + LOG.debug("debugDisplayAllMaps partitionColumnCount " + partitionColumnCount); + LOG.debug("debugDisplayAllMaps scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames)); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig deleted file mode 100644 index 0d4c1d8..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java.orig +++ /dev/null @@ -1,1744 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.optimizer.physical; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.Stack; -import java.util.regex.Pattern; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.ql.exec.*; -import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; -import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; -import org.apache.hadoop.hive.ql.exec.spark.SparkTask; -import org.apache.hadoop.hive.ql.exec.tez.TezTask; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyLongOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyMultiKeyOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerBigOnlyStringOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerLongOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerMultiKeyOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinInnerStringOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiLongOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiMultiKeyOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLeftSemiStringOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; -import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker; -import org.apache.hadoop.hive.ql.lib.PreOrderWalker; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; -import org.apache.hadoop.hive.ql.lib.TaskGraphWalker; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; -import org.apache.hadoop.hive.ql.plan.JoinDesc; -import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.ql.plan.ReduceWork; -import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; -import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc; -import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.plan.TableScanDesc; -import org.apache.hadoop.hive.ql.plan.TezWork; -import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; -import org.apache.hadoop.hive.ql.plan.api.OperatorType; -import org.apache.hadoop.hive.ql.udf.UDFAcos; -import org.apache.hadoop.hive.ql.udf.UDFAsin; -import org.apache.hadoop.hive.ql.udf.UDFAtan; -import org.apache.hadoop.hive.ql.udf.UDFBin; -import org.apache.hadoop.hive.ql.udf.UDFConv; -import org.apache.hadoop.hive.ql.udf.UDFCos; -import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; -import org.apache.hadoop.hive.ql.udf.UDFDegrees; -import org.apache.hadoop.hive.ql.udf.UDFExp; -import org.apache.hadoop.hive.ql.udf.UDFHex; -import org.apache.hadoop.hive.ql.udf.UDFHour; -import org.apache.hadoop.hive.ql.udf.UDFLength; -import org.apache.hadoop.hive.ql.udf.UDFLike; -import org.apache.hadoop.hive.ql.udf.UDFLn; -import org.apache.hadoop.hive.ql.udf.UDFLog; -import org.apache.hadoop.hive.ql.udf.UDFLog10; -import org.apache.hadoop.hive.ql.udf.UDFLog2; -import org.apache.hadoop.hive.ql.udf.UDFMinute; -import org.apache.hadoop.hive.ql.udf.UDFMonth; -import org.apache.hadoop.hive.ql.udf.UDFRadians; -import org.apache.hadoop.hive.ql.udf.UDFRand; -import org.apache.hadoop.hive.ql.udf.UDFSecond; -import org.apache.hadoop.hive.ql.udf.UDFSign; -import org.apache.hadoop.hive.ql.udf.UDFSin; -import org.apache.hadoop.hive.ql.udf.UDFSqrt; -import org.apache.hadoop.hive.ql.udf.UDFSubstr; -import org.apache.hadoop.hive.ql.udf.UDFTan; -import org.apache.hadoop.hive.ql.udf.UDFToBoolean; -import org.apache.hadoop.hive.ql.udf.UDFToByte; -import org.apache.hadoop.hive.ql.udf.UDFToDouble; -import org.apache.hadoop.hive.ql.udf.UDFToFloat; -import org.apache.hadoop.hive.ql.udf.UDFToInteger; -import org.apache.hadoop.hive.ql.udf.UDFToLong; -import org.apache.hadoop.hive.ql.udf.UDFToShort; -import org.apache.hadoop.hive.ql.udf.UDFToString; -import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; -import org.apache.hadoop.hive.ql.udf.UDFYear; -import org.apache.hadoop.hive.ql.udf.generic.*; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; - -public class Vectorizer implements PhysicalPlanResolver { - - protected static transient final Log LOG = LogFactory.getLog(Vectorizer.class); - - Pattern supportedDataTypesPattern; - List> vectorizableTasks = - new ArrayList>(); - Set> supportedGenericUDFs = new HashSet>(); - - Set supportedAggregationUdfs = new HashSet(); - - private HiveConf hiveConf; - - public Vectorizer() { - - StringBuilder patternBuilder = new StringBuilder(); - patternBuilder.append("int"); - patternBuilder.append("|smallint"); - patternBuilder.append("|tinyint"); - patternBuilder.append("|bigint"); - patternBuilder.append("|integer"); - patternBuilder.append("|long"); - patternBuilder.append("|short"); - patternBuilder.append("|timestamp"); - patternBuilder.append("|" + serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME); - patternBuilder.append("|" + serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME); - patternBuilder.append("|boolean"); - patternBuilder.append("|binary"); - patternBuilder.append("|string"); - patternBuilder.append("|byte"); - patternBuilder.append("|float"); - patternBuilder.append("|double"); - patternBuilder.append("|date"); - patternBuilder.append("|void"); - - // Decimal types can be specified with different precision and scales e.g. decimal(10,5), - // as opposed to other data types which can be represented by constant strings. - // The regex matches only the "decimal" prefix of the type. - patternBuilder.append("|decimal.*"); - - // CHAR and VARCHAR types can be specified with maximum length. - patternBuilder.append("|char.*"); - patternBuilder.append("|varchar.*"); - - supportedDataTypesPattern = Pattern.compile(patternBuilder.toString()); - - supportedGenericUDFs.add(GenericUDFOPPlus.class); - supportedGenericUDFs.add(GenericUDFOPMinus.class); - supportedGenericUDFs.add(GenericUDFOPMultiply.class); - supportedGenericUDFs.add(GenericUDFOPDivide.class); - supportedGenericUDFs.add(GenericUDFOPMod.class); - supportedGenericUDFs.add(GenericUDFOPNegative.class); - supportedGenericUDFs.add(GenericUDFOPPositive.class); - - supportedGenericUDFs.add(GenericUDFOPEqualOrLessThan.class); - supportedGenericUDFs.add(GenericUDFOPEqualOrGreaterThan.class); - supportedGenericUDFs.add(GenericUDFOPGreaterThan.class); - supportedGenericUDFs.add(GenericUDFOPLessThan.class); - supportedGenericUDFs.add(GenericUDFOPNot.class); - supportedGenericUDFs.add(GenericUDFOPNotEqual.class); - supportedGenericUDFs.add(GenericUDFOPNotNull.class); - supportedGenericUDFs.add(GenericUDFOPNull.class); - supportedGenericUDFs.add(GenericUDFOPOr.class); - supportedGenericUDFs.add(GenericUDFOPAnd.class); - supportedGenericUDFs.add(GenericUDFOPEqual.class); - supportedGenericUDFs.add(UDFLength.class); - - supportedGenericUDFs.add(UDFYear.class); - supportedGenericUDFs.add(UDFMonth.class); - supportedGenericUDFs.add(UDFDayOfMonth.class); - supportedGenericUDFs.add(UDFHour.class); - supportedGenericUDFs.add(UDFMinute.class); - supportedGenericUDFs.add(UDFSecond.class); - supportedGenericUDFs.add(UDFWeekOfYear.class); - supportedGenericUDFs.add(GenericUDFToUnixTimeStamp.class); - - supportedGenericUDFs.add(GenericUDFDateAdd.class); - supportedGenericUDFs.add(GenericUDFDateSub.class); - supportedGenericUDFs.add(GenericUDFDate.class); - supportedGenericUDFs.add(GenericUDFDateDiff.class); - - supportedGenericUDFs.add(UDFLike.class); - supportedGenericUDFs.add(GenericUDFRegExp.class); - supportedGenericUDFs.add(UDFSubstr.class); - supportedGenericUDFs.add(GenericUDFLTrim.class); - supportedGenericUDFs.add(GenericUDFRTrim.class); - supportedGenericUDFs.add(GenericUDFTrim.class); - - supportedGenericUDFs.add(UDFSin.class); - supportedGenericUDFs.add(UDFCos.class); - supportedGenericUDFs.add(UDFTan.class); - supportedGenericUDFs.add(UDFAsin.class); - supportedGenericUDFs.add(UDFAcos.class); - supportedGenericUDFs.add(UDFAtan.class); - supportedGenericUDFs.add(UDFDegrees.class); - supportedGenericUDFs.add(UDFRadians.class); - supportedGenericUDFs.add(GenericUDFFloor.class); - supportedGenericUDFs.add(GenericUDFCeil.class); - supportedGenericUDFs.add(UDFExp.class); - supportedGenericUDFs.add(UDFLn.class); - supportedGenericUDFs.add(UDFLog2.class); - supportedGenericUDFs.add(UDFLog10.class); - supportedGenericUDFs.add(UDFLog.class); - supportedGenericUDFs.add(GenericUDFPower.class); - supportedGenericUDFs.add(GenericUDFRound.class); - supportedGenericUDFs.add(GenericUDFBRound.class); - supportedGenericUDFs.add(GenericUDFPosMod.class); - supportedGenericUDFs.add(UDFSqrt.class); - supportedGenericUDFs.add(UDFSign.class); - supportedGenericUDFs.add(UDFRand.class); - supportedGenericUDFs.add(UDFBin.class); - supportedGenericUDFs.add(UDFHex.class); - supportedGenericUDFs.add(UDFConv.class); - - supportedGenericUDFs.add(GenericUDFLower.class); - supportedGenericUDFs.add(GenericUDFUpper.class); - supportedGenericUDFs.add(GenericUDFConcat.class); - supportedGenericUDFs.add(GenericUDFAbs.class); - supportedGenericUDFs.add(GenericUDFBetween.class); - supportedGenericUDFs.add(GenericUDFIn.class); - supportedGenericUDFs.add(GenericUDFCase.class); - supportedGenericUDFs.add(GenericUDFWhen.class); - supportedGenericUDFs.add(GenericUDFCoalesce.class); - supportedGenericUDFs.add(GenericUDFElt.class); - supportedGenericUDFs.add(GenericUDFInitCap.class); - - // For type casts - supportedGenericUDFs.add(UDFToLong.class); - supportedGenericUDFs.add(UDFToInteger.class); - supportedGenericUDFs.add(UDFToShort.class); - supportedGenericUDFs.add(UDFToByte.class); - supportedGenericUDFs.add(UDFToBoolean.class); - supportedGenericUDFs.add(UDFToFloat.class); - supportedGenericUDFs.add(UDFToDouble.class); - supportedGenericUDFs.add(UDFToString.class); - supportedGenericUDFs.add(GenericUDFTimestamp.class); - supportedGenericUDFs.add(GenericUDFToDecimal.class); - supportedGenericUDFs.add(GenericUDFToDate.class); - supportedGenericUDFs.add(GenericUDFToChar.class); - supportedGenericUDFs.add(GenericUDFToVarchar.class); - supportedGenericUDFs.add(GenericUDFToIntervalYearMonth.class); - supportedGenericUDFs.add(GenericUDFToIntervalDayTime.class); - - // For conditional expressions - supportedGenericUDFs.add(GenericUDFIf.class); - - supportedAggregationUdfs.add("min"); - supportedAggregationUdfs.add("max"); - supportedAggregationUdfs.add("count"); - supportedAggregationUdfs.add("sum"); - supportedAggregationUdfs.add("avg"); - supportedAggregationUdfs.add("variance"); - supportedAggregationUdfs.add("var_pop"); - supportedAggregationUdfs.add("var_samp"); - supportedAggregationUdfs.add("std"); - supportedAggregationUdfs.add("stddev"); - supportedAggregationUdfs.add("stddev_pop"); - supportedAggregationUdfs.add("stddev_samp"); - } - - class VectorizationDispatcher implements Dispatcher { - - private List reduceColumnNames; - private List reduceTypeInfos; - - public VectorizationDispatcher(PhysicalContext physicalContext) { - reduceColumnNames = null; - reduceTypeInfos = null; - } - - @Override - public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) - throws SemanticException { - Task currTask = (Task) nd; - if (currTask instanceof MapRedTask) { - convertMapWork(((MapRedTask) currTask).getWork().getMapWork(), false); - } else if (currTask instanceof TezTask) { - TezWork work = ((TezTask) currTask).getWork(); - for (BaseWork w: work.getAllWork()) { - if (w instanceof MapWork) { - convertMapWork((MapWork) w, true); - } else if (w instanceof ReduceWork) { - // We are only vectorizing Reduce under Tez. - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) w, true); - } - } - } - } else if (currTask instanceof SparkTask) { - SparkWork sparkWork = (SparkWork) currTask.getWork(); - for (BaseWork baseWork : sparkWork.getAllWork()) { - if (baseWork instanceof MapWork) { - convertMapWork((MapWork) baseWork, false); - } else if (baseWork instanceof ReduceWork - && HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED)) { - convertReduceWork((ReduceWork) baseWork, false); - } - } - } - return null; - } - - private void convertMapWork(MapWork mapWork, boolean isTez) throws SemanticException { - boolean ret = validateMapWork(mapWork, isTez); - if (ret) { - vectorizeMapWork(mapWork, isTez); - } - } - - private void addMapWorkRules(Map opRules, NodeProcessor np) { - opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" - + FileSinkOperator.getOperatorName()), np); - opRules.put(new RuleRegExp("R2", TableScanOperator.getOperatorName() + ".*" - + ReduceSinkOperator.getOperatorName()), np); - } - - private boolean validateMapWork(MapWork mapWork, boolean isTez) throws SemanticException { - LOG.info("Validating MapWork..."); - - // Eliminate MR plans with more than one TableScanOperator. - LinkedHashMap> aliasToWork = mapWork.getAliasToWork(); - if ((aliasToWork == null) || (aliasToWork.size() == 0)) { - return false; - } - int tableScanCount = 0; - for (Operator op : aliasToWork.values()) { - if (op == null) { - LOG.warn("Map work has invalid aliases to work with. Fail validation!"); - return false; - } - if (op instanceof TableScanOperator) { - tableScanCount++; - } - } - if (tableScanCount > 1) { - LOG.warn("Map work has more than 1 TableScanOperator aliases to work with. Fail validation!"); - return false; - } - - // Validate the input format - for (String path : mapWork.getPathToPartitionInfo().keySet()) { - PartitionDesc pd = mapWork.getPathToPartitionInfo().get(path); - List> interfaceList = - Arrays.asList(pd.getInputFileFormatClass().getInterfaces()); - if (!interfaceList.contains(VectorizedInputFormatInterface.class)) { - LOG.info("Input format: " + pd.getInputFileFormatClassName() - + ", doesn't provide vectorized input"); - return false; - } - } - Map opRules = new LinkedHashMap(); - MapWorkValidationNodeProcessor vnp = new MapWorkValidationNodeProcessor(mapWork, isTez); - addMapWorkRules(opRules, vnp); - Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); - GraphWalker ogw = new DefaultGraphWalker(disp); - - // iterator the mapper operator tree - ArrayList topNodes = new ArrayList(); - topNodes.addAll(mapWork.getAliasToWork().values()); - HashMap nodeOutput = new HashMap(); - ogw.startWalking(topNodes, nodeOutput); - for (Node n : nodeOutput.keySet()) { - if (nodeOutput.get(n) != null) { - if (!((Boolean)nodeOutput.get(n)).booleanValue()) { - return false; - } - } - } - return true; - } - - private void vectorizeMapWork(MapWork mapWork, boolean isTez) throws SemanticException { - LOG.info("Vectorizing MapWork..."); - mapWork.setVectorMode(true); - Map opRules = new LinkedHashMap(); - MapWorkVectorizationNodeProcessor vnp = new MapWorkVectorizationNodeProcessor(mapWork, isTez); - addMapWorkRules(opRules, vnp); - Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); - GraphWalker ogw = new PreOrderOnceWalker(disp); - // iterator the mapper operator tree - ArrayList topNodes = new ArrayList(); - topNodes.addAll(mapWork.getAliasToWork().values()); - HashMap nodeOutput = new HashMap(); - ogw.startWalking(topNodes, nodeOutput); - - mapWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); - mapWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); - mapWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); - - if (LOG.isDebugEnabled()) { - debugDisplayAllMaps(mapWork); - } - - return; - } - - private void convertReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { - boolean ret = validateReduceWork(reduceWork); - if (ret) { - vectorizeReduceWork(reduceWork, isTez); - } - } - - private boolean getOnlyStructObjectInspectors(ReduceWork reduceWork) throws SemanticException { - try { - // Check key ObjectInspector. - ObjectInspector keyObjectInspector = reduceWork.getKeyObjectInspector(); - if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) { - return false; - } - StructObjectInspector keyStructObjectInspector = (StructObjectInspector)keyObjectInspector; - List keyFields = keyStructObjectInspector.getAllStructFieldRefs(); - - // Tez doesn't use tagging... - if (reduceWork.getNeedsTagging()) { - return false; - } - - // Check value ObjectInspector. - ObjectInspector valueObjectInspector = reduceWork.getValueObjectInspector(); - if (valueObjectInspector == null || - !(valueObjectInspector instanceof StructObjectInspector)) { - return false; - } - StructObjectInspector valueStructObjectInspector = (StructObjectInspector)valueObjectInspector; - List valueFields = valueStructObjectInspector.getAllStructFieldRefs(); - - reduceColumnNames = new ArrayList(); - reduceTypeInfos = new ArrayList(); - - for (StructField field: keyFields) { - reduceColumnNames.add(Utilities.ReduceField.KEY.toString() + "." + field.getFieldName()); - reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); - } - for (StructField field: valueFields) { - reduceColumnNames.add(Utilities.ReduceField.VALUE.toString() + "." + field.getFieldName()); - reduceTypeInfos.add(TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName())); - } - } catch (Exception e) { - throw new SemanticException(e); - } - return true; - } - - private void addReduceWorkRules(Map opRules, NodeProcessor np) { - opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + ".*"), np); - opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np); - } - - private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException { - LOG.info("Validating ReduceWork..."); - - // Validate input to ReduceWork. - if (!getOnlyStructObjectInspectors(reduceWork)) { - return false; - } - // Now check the reduce operator tree. - Map opRules = new LinkedHashMap(); - ReduceWorkValidationNodeProcessor vnp = new ReduceWorkValidationNodeProcessor(); - addReduceWorkRules(opRules, vnp); - Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); - GraphWalker ogw = new DefaultGraphWalker(disp); - // iterator the reduce operator tree - ArrayList topNodes = new ArrayList(); - topNodes.add(reduceWork.getReducer()); - HashMap nodeOutput = new HashMap(); - ogw.startWalking(topNodes, nodeOutput); - for (Node n : nodeOutput.keySet()) { - if (nodeOutput.get(n) != null) { - if (!((Boolean)nodeOutput.get(n)).booleanValue()) { - return false; - } - } - } - return true; - } - - private void vectorizeReduceWork(ReduceWork reduceWork, boolean isTez) throws SemanticException { - LOG.info("Vectorizing ReduceWork..."); - reduceWork.setVectorMode(true); - - // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as - // expected. We need to descend down, otherwise it breaks our algorithm that determines - // VectorizationContext... Do we use PreOrderWalker instead of DefaultGraphWalker. - Map opRules = new LinkedHashMap(); - ReduceWorkVectorizationNodeProcessor vnp = - new ReduceWorkVectorizationNodeProcessor(reduceColumnNames, reduceTypeInfos, isTez); - addReduceWorkRules(opRules, vnp); - Dispatcher disp = new DefaultRuleDispatcher(vnp, opRules, null); - GraphWalker ogw = new PreOrderWalker(disp); - // iterator the reduce operator tree - ArrayList topNodes = new ArrayList(); - topNodes.add(reduceWork.getReducer()); - LOG.info("vectorizeReduceWork reducer Operator: " + - reduceWork.getReducer().getName() + "..."); - HashMap nodeOutput = new HashMap(); - ogw.startWalking(topNodes, nodeOutput); - - // Necessary since we are vectorizing the root operator in reduce. - reduceWork.setReducer(vnp.getRootVectorOp()); - - reduceWork.setVectorColumnNameMap(vnp.getVectorColumnNameMap()); - reduceWork.setVectorColumnTypeMap(vnp.getVectorColumnTypeMap()); - reduceWork.setVectorScratchColumnTypeMap(vnp.getVectorScratchColumnTypeMap()); - - if (LOG.isDebugEnabled()) { - debugDisplayAllMaps(reduceWork); - } - } - } - - class MapWorkValidationNodeProcessor implements NodeProcessor { - - private final MapWork mapWork; - private final boolean isTez; - - public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) { - this.mapWork = mapWork; - this.isTez = isTez; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - for (Node n : stack) { - Operator op = (Operator) n; - if (nonVectorizableChildOfGroupBy(op)) { - return new Boolean(true); - } - boolean ret = validateMapWorkOperator(op, mapWork, isTez); - if (!ret) { - LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); - return new Boolean(false); - } - } - return new Boolean(true); - } - } - - class ReduceWorkValidationNodeProcessor implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - for (Node n : stack) { - Operator op = (Operator) n; - if (nonVectorizableChildOfGroupBy(op)) { - return new Boolean(true); - } - boolean ret = validateReduceWorkOperator(op); - if (!ret) { - LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized."); - return new Boolean(false); - } - } - return new Boolean(true); - } - } - - // This class has common code used by both MapWorkVectorizationNodeProcessor and - // ReduceWorkVectorizationNodeProcessor. - class VectorizationNodeProcessor implements NodeProcessor { - - // The vectorization context for the Map or Reduce task. - protected VectorizationContext taskVectorizationContext; - - // The input projection column type name map for the Map or Reduce task. - protected Map taskColumnTypeNameMap; - - VectorizationNodeProcessor() { - taskColumnTypeNameMap = new HashMap(); - } - - public Map getVectorColumnNameMap() { - return taskVectorizationContext.getProjectionColumnMap(); - } - - public Map getVectorColumnTypeMap() { - return taskColumnTypeNameMap; - } - - public Map getVectorScratchColumnTypeMap() { - return taskVectorizationContext.getScratchColumnTypeMap(); - } - - protected final Set> opsDone = - new HashSet>(); - - protected final Map, Operator> opToVectorOpMap = - new HashMap, Operator>(); - - public VectorizationContext walkStackToFindVectorizationContext(Stack stack, - Operator op) throws SemanticException { - VectorizationContext vContext = null; - if (stack.size() <= 1) { - throw new SemanticException( - String.format("Expected operator stack for operator %s to have at least 2 operators", - op.getName())); - } - // Walk down the stack of operators until we found one willing to give us a context. - // At the bottom will be the root operator, guaranteed to have a context - int i= stack.size()-2; - while (vContext == null) { - if (i < 0) { - return null; - } - Operator opParent = (Operator) stack.get(i); - Operator vectorOpParent = opToVectorOpMap.get(opParent); - if (vectorOpParent != null) { - if (vectorOpParent instanceof VectorizationContextRegion) { - VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOpParent; - vContext = vcRegion.getOuputVectorizationContext(); - LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " has new vectorization context " + vContext.toString()); - } else { - LOG.info("walkStackToFindVectorizationContext " + vectorOpParent.getName() + " does not have new vectorization context"); - } - } else { - LOG.info("walkStackToFindVectorizationContext " + opParent.getName() + " is not vectorized"); - } - --i; - } - return vContext; - } - - public Operator doVectorize(Operator op, - VectorizationContext vContext, boolean isTez) throws SemanticException { - Operator vectorOp = op; - try { - if (!opsDone.contains(op)) { - vectorOp = vectorizeOperator(op, vContext, isTez); - opsDone.add(op); - if (vectorOp != op) { - opToVectorOpMap.put(op, vectorOp); - opsDone.add(vectorOp); - } - } - } catch (HiveException e) { - throw new SemanticException(e); - } - return vectorOp; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - throw new SemanticException("Must be overridden"); - } - } - - class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { - - private final boolean isTez; - - public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez) { - super(); - this.isTez = isTez; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - Operator op = (Operator) nd; - - VectorizationContext vContext = null; - - if (op instanceof TableScanOperator) { - if (taskVectorizationContext == null) { - taskVectorizationContext = getVectorizationContext(op.getSchema(), op.getName(), - taskColumnTypeNameMap); - } - vContext = taskVectorizationContext; - } else { - LOG.info("MapWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName()); - vContext = walkStackToFindVectorizationContext(stack, op); - if (vContext == null) { - // No operator has "pushed" a new context -- so use the task vectorization context. - vContext = taskVectorizationContext; - } - } - - assert vContext != null; - LOG.info("MapWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); - - // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't - // vectorize the operators below it. - if (nonVectorizableChildOfGroupBy(op)) { - // No need to vectorize - if (!opsDone.contains(op)) { - opsDone.add(op); - } - return null; - } - - Operator vectorOp = doVectorize(op, vContext, isTez); - - if (LOG.isDebugEnabled()) { - if (vectorOp instanceof VectorizationContextRegion) { - VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; - VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext(); - LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); - } - } - - return null; - } - } - - class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor { - - private final List reduceColumnNames; - private final List reduceTypeInfos; - - private final boolean isTez; - - private Operator rootVectorOp; - - public Operator getRootVectorOp() { - return rootVectorOp; - } - - public ReduceWorkVectorizationNodeProcessor(List reduceColumnNames, - List reduceTypeInfos, boolean isTez) { - super(); - this.reduceColumnNames = reduceColumnNames; - this.reduceTypeInfos = reduceTypeInfos; - rootVectorOp = null; - this.isTez = isTez; - } - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - - Operator op = (Operator) nd; - - VectorizationContext vContext = null; - - boolean saveRootVectorOp = false; - - if (op.getParentOperators().size() == 0) { - LOG.info("ReduceWorkVectorizationNodeProcessor process reduceColumnNames " + reduceColumnNames.toString()); - - vContext = new VectorizationContext("__Reduce_Shuffle__", reduceColumnNames); - taskVectorizationContext = vContext; - int i = 0; - for (TypeInfo typeInfo : reduceTypeInfos) { - taskColumnTypeNameMap.put(i, typeInfo.getTypeName()); - i++; - } - saveRootVectorOp = true; - - if (LOG.isDebugEnabled()) { - LOG.debug("Vectorized ReduceWork reduce shuffle vectorization context " + vContext.toString()); - } - } else { - LOG.info("ReduceWorkVectorizationNodeProcessor process going to walk the operator stack to get vectorization context for " + op.getName()); - vContext = walkStackToFindVectorizationContext(stack, op); - if (vContext == null) { - // If we didn't find a context among the operators, assume the top -- reduce shuffle's - // vectorization context. - vContext = taskVectorizationContext; - } - } - - assert vContext != null; - LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); - - // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't - // vectorize the operators below it. - if (nonVectorizableChildOfGroupBy(op)) { - // No need to vectorize - if (!opsDone.contains(op)) { - opsDone.add(op); - } - return null; - } - - Operator vectorOp = doVectorize(op, vContext, isTez); - - if (LOG.isDebugEnabled()) { - if (vectorOp instanceof VectorizationContextRegion) { - VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp; - VectorizationContext vNewContext = vcRegion.getOuputVectorizationContext(); - LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vNewContext.toString()); - } - } - if (saveRootVectorOp && op != vectorOp) { - rootVectorOp = vectorOp; - } - - return null; - } - } - - private static class ValidatorVectorizationContext extends VectorizationContext { - private ValidatorVectorizationContext() { - super("No Name"); - } - - @Override - protected int getInputColumnIndex(String name) { - return 0; - } - - @Override - protected int getInputColumnIndex(ExprNodeColumnDesc colExpr) { - return 0; - } - } - - @Override - public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticException { - hiveConf = physicalContext.getConf(); - - boolean vectorPath = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); - if (!vectorPath) { - LOG.info("Vectorization is disabled"); - return physicalContext; - } - // create dispatcher and graph walker - Dispatcher disp = new VectorizationDispatcher(physicalContext); - TaskGraphWalker ogw = new TaskGraphWalker(disp); - - // get all the tasks nodes from root task - ArrayList topNodes = new ArrayList(); - topNodes.addAll(physicalContext.getRootTasks()); - - // begin to walk through the task tree. - ogw.startWalking(topNodes, null); - return physicalContext; - } - - boolean validateMapWorkOperator(Operator op, MapWork mWork, boolean isTez) { - boolean ret = false; - switch (op.getType()) { - case MAPJOIN: - if (op instanceof MapJoinOperator) { - ret = validateMapJoinOperator((MapJoinOperator) op); - } else if (op instanceof SMBMapJoinOperator) { - ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); - } - break; - case GROUPBY: - ret = validateGroupByOperator((GroupByOperator) op, false, isTez); - break; - case FILTER: - ret = validateFilterOperator((FilterOperator) op); - break; - case SELECT: - ret = validateSelectOperator((SelectOperator) op); - break; - case REDUCESINK: - ret = validateReduceSinkOperator((ReduceSinkOperator) op); - break; - case TABLESCAN: - ret = validateTableScanOperator((TableScanOperator) op, mWork); - break; - case FILESINK: - case LIMIT: - case EVENT: - case SPARKPRUNINGSINK: - ret = true; - break; - case HASHTABLESINK: - ret = op instanceof SparkHashTableSinkOperator && - validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); - break; - default: - ret = false; - break; - } - return ret; - } - - boolean validateReduceWorkOperator(Operator op) { - boolean ret = false; - switch (op.getType()) { - case MAPJOIN: - // Does MAPJOIN actually get planned in Reduce? - if (op instanceof MapJoinOperator) { - ret = validateMapJoinOperator((MapJoinOperator) op); - } else if (op instanceof SMBMapJoinOperator) { - ret = validateSMBMapJoinOperator((SMBMapJoinOperator) op); - } - break; - case GROUPBY: - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { - ret = validateGroupByOperator((GroupByOperator) op, true, true); - } else { - ret = false; - } - break; - case FILTER: - ret = validateFilterOperator((FilterOperator) op); - break; - case SELECT: - ret = validateSelectOperator((SelectOperator) op); - break; - case REDUCESINK: - ret = validateReduceSinkOperator((ReduceSinkOperator) op); - break; - case FILESINK: - ret = validateFileSinkOperator((FileSinkOperator) op); - break; - case LIMIT: - case EVENT: - case SPARKPRUNINGSINK: - ret = true; - break; - case HASHTABLESINK: - ret = op instanceof SparkHashTableSinkOperator && - validateSparkHashTableSinkOperator((SparkHashTableSinkOperator) op); - break; - default: - ret = false; - break; - } - return ret; - } - - public Boolean nonVectorizableChildOfGroupBy(Operator op) { - Operator currentOp = op; - while (currentOp.getParentOperators().size() > 0) { - currentOp = currentOp.getParentOperators().get(0); - if (currentOp.getType().equals(OperatorType.GROUPBY)) { - GroupByDesc desc = (GroupByDesc)currentOp.getConf(); - boolean isVectorOutput = desc.getVectorDesc().isVectorOutput(); - if (isVectorOutput) { - // This GROUP BY does vectorize its output. - return false; - } - return true; - } - } - return false; - } - - private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) { - SMBJoinDesc desc = op.getConf(); - // Validation is the same as for map join, since the 'small' tables are not vectorized - return validateMapJoinDesc(desc); - } - - private boolean validateTableScanOperator(TableScanOperator op, MapWork mWork) { - TableScanDesc desc = op.getConf(); - if (desc.isGatherStats()) { - return false; - } - - String columns = ""; - String types = ""; - String partitionColumns = ""; - String partitionTypes = ""; - boolean haveInfo = false; - - // This over-reaches slightly, since we can have > 1 table-scan per map-work. - // It needs path to partition, path to alias, then check the alias == the same table-scan, to be accurate. - // That said, that is a TODO item to be fixed when we support >1 TableScans per vectorized pipeline later. - LinkedHashMap partitionDescs = mWork.getPathToPartitionInfo(); - - // For vectorization, compare each partition information for against the others. - // We assume the table information will be from one of the partitions, so it will - // work to focus on the partition information and not compare against the TableScanOperator - // columns (in the VectorizationContext).... - for (Map.Entry entry : partitionDescs.entrySet()) { - PartitionDesc partDesc = entry.getValue(); - if (partDesc.getPartSpec() == null || partDesc.getPartSpec().isEmpty()) { - // No partition information -- we match because we would default to using the table description. - continue; - } - Properties partProps = partDesc.getProperties(); - if (!haveInfo) { - columns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); - types = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); - partitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - partitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - haveInfo = true; - } else { - String nextColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMNS); - String nextTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_COLUMN_TYPES); - String nextPartitionColumns = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - String nextPartitionTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); - if (!columns.equalsIgnoreCase(nextColumns)) { - LOG.info( - String.format("Could not vectorize partition %s. Its column names %s do not match the other column names %s", - entry.getKey(), nextColumns, columns)); - return false; - } - if (!types.equalsIgnoreCase(nextTypes)) { - LOG.info( - String.format("Could not vectorize partition %s. Its column types %s do not match the other column types %s", - entry.getKey(), nextTypes, types)); - return false; - } - if (!partitionColumns.equalsIgnoreCase(nextPartitionColumns)) { - LOG.info( - String.format("Could not vectorize partition %s. Its partition column names %s do not match the other partition column names %s", - entry.getKey(), nextPartitionColumns, partitionColumns)); - return false; - } - if (!partitionTypes.equalsIgnoreCase(nextPartitionTypes)) { - LOG.info( - String.format("Could not vectorize partition %s. Its partition column types %s do not match the other partition column types %s", - entry.getKey(), nextPartitionTypes, partitionTypes)); - return false; - } - } - } - return true; - } - - private boolean validateMapJoinOperator(MapJoinOperator op) { - MapJoinDesc desc = op.getConf(); - return validateMapJoinDesc(desc); - } - - private boolean validateMapJoinDesc(MapJoinDesc desc) { - byte posBigTable = (byte) desc.getPosBigTable(); - List filterExprs = desc.getFilters().get(posBigTable); - if (!validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER)) { - LOG.info("Cannot vectorize map work filter expression"); - return false; - } - List keyExprs = desc.getKeys().get(posBigTable); - if (!validateExprNodeDesc(keyExprs)) { - LOG.info("Cannot vectorize map work key expression"); - return false; - } - List valueExprs = desc.getExprs().get(posBigTable); - if (!validateExprNodeDesc(valueExprs)) { - LOG.info("Cannot vectorize map work value expression"); - return false; - } - return true; - } - - private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op) { - SparkHashTableSinkDesc desc = op.getConf(); - byte tag = desc.getTag(); - // it's essentially a MapJoinDesc - List filterExprs = desc.getFilters().get(tag); - List keyExprs = desc.getKeys().get(tag); - List valueExprs = desc.getExprs().get(tag); - return validateExprNodeDesc(filterExprs, VectorExpressionDescriptor.Mode.FILTER) && - validateExprNodeDesc(keyExprs) && validateExprNodeDesc(valueExprs); - } - - private boolean validateReduceSinkOperator(ReduceSinkOperator op) { - List keyDescs = op.getConf().getKeyCols(); - List partitionDescs = op.getConf().getPartitionCols(); - List valueDesc = op.getConf().getValueCols(); - return validateExprNodeDesc(keyDescs) && validateExprNodeDesc(partitionDescs) && - validateExprNodeDesc(valueDesc); - } - - private boolean validateSelectOperator(SelectOperator op) { - List descList = op.getConf().getColList(); - for (ExprNodeDesc desc : descList) { - boolean ret = validateExprNodeDesc(desc); - if (!ret) { - LOG.info("Cannot vectorize select expression: " + desc.toString()); - return false; - } - } - return true; - } - - private boolean validateFilterOperator(FilterOperator op) { - ExprNodeDesc desc = op.getConf().getPredicate(); - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.FILTER); - } - - private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTez) { - GroupByDesc desc = op.getConf(); - VectorGroupByDesc vectorDesc = desc.getVectorDesc(); - - if (desc.isGroupingSetsPresent()) { - LOG.info("Grouping sets not supported in vector mode"); - return false; - } - if (desc.pruneGroupingSetId()) { - LOG.info("Pruning grouping set id not supported in vector mode"); - return false; - } - boolean ret = validateExprNodeDesc(desc.getKeys()); - if (!ret) { - LOG.info("Cannot vectorize groupby key expression"); - return false; - } - - if (!isReduce) { - - // MapWork - - ret = validateHashAggregationDesc(desc.getAggregators()); - if (!ret) { - return false; - } - } else { - - // ReduceWork - - boolean isComplete = desc.getMode() == GroupByDesc.Mode.COMPLETE; - if (desc.getMode() != GroupByDesc.Mode.HASH) { - - // Reduce Merge-Partial GROUP BY. - - // A merge-partial GROUP BY is fed by grouping by keys from reduce-shuffle. It is the - // first (or root) operator for its reduce task. - // TODO: Technically, we should also handle FINAL, PARTIAL1, PARTIAL2 and PARTIALS - // that are not hash or complete, but aren't merge-partial, somehow. - - if (desc.isDistinct()) { - LOG.info("Vectorized Reduce MergePartial GROUP BY does not support DISTINCT"); - return false; - } - - boolean hasKeys = (desc.getKeys().size() > 0); - - // Do we support merge-partial aggregation AND the output is primitive? - ret = validateReduceMergePartialAggregationDesc(desc.getAggregators(), hasKeys); - if (!ret) { - return false; - } - - if (hasKeys) { - if (op.getParentOperators().size() > 0 && !isComplete) { - LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle a key group when it is fed by reduce-shuffle"); - return false; - } - - LOG.info("Vectorized Reduce MergePartial GROUP BY will process key groups"); - - // Primitive output validation above means we can output VectorizedRowBatch to the - // children operators. - vectorDesc.setVectorOutput(true); - } else { - LOG.info("Vectorized Reduce MergePartial GROUP BY will do global aggregation"); - } - if (!isComplete) { - vectorDesc.setIsReduceMergePartial(true); - } else { - vectorDesc.setIsReduceStreaming(true); - } - } else { - - // Reduce Hash GROUP BY or global aggregation. - - ret = validateHashAggregationDesc(desc.getAggregators()); - if (!ret) { - return false; - } - } - } - - return true; - } - - private boolean validateFileSinkOperator(FileSinkOperator op) { - return true; - } - - private boolean validateExprNodeDesc(List descs) { - return validateExprNodeDesc(descs, VectorExpressionDescriptor.Mode.PROJECTION); - } - - private boolean validateExprNodeDesc(List descs, - VectorExpressionDescriptor.Mode mode) { - for (ExprNodeDesc d : descs) { - boolean ret = validateExprNodeDesc(d, mode); - if (!ret) { - return false; - } - } - return true; - } - - - private boolean validateHashAggregationDesc(List descs) { - return validateAggregationDesc(descs, /* isReduceMergePartial */ false, false); - } - - private boolean validateReduceMergePartialAggregationDesc(List descs, boolean hasKeys) { - return validateAggregationDesc(descs, /* isReduceMergePartial */ true, hasKeys); - } - - private boolean validateAggregationDesc(List descs, boolean isReduceMergePartial, boolean hasKeys) { - for (AggregationDesc d : descs) { - boolean ret = validateAggregationDesc(d, isReduceMergePartial, hasKeys); - if (!ret) { - return false; - } - } - return true; - } - - private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (desc instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; - // Currently, we do not support vectorized virtual columns (see HIVE-5570). - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(c.getColumn())) { - LOG.info("Cannot vectorize virtual column " + c.getColumn()); - return false; - } - } - String typeName = desc.getTypeInfo().getTypeName(); - boolean ret = validateDataType(typeName, mode); - if (!ret) { - LOG.info("Cannot vectorize " + desc.toString() + " of type " + typeName); - return false; - } - if (desc instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc d = (ExprNodeGenericFuncDesc) desc; - boolean r = validateGenericUdf(d); - if (!r) { - LOG.info("Cannot vectorize UDF " + d); - return false; - } - } - if (desc.getChildren() != null) { - for (ExprNodeDesc d: desc.getChildren()) { - // Don't restrict child expressions for projection. Always use looser FILTER mode. - boolean r = validateExprNodeDescRecursive(d, VectorExpressionDescriptor.Mode.FILTER); - if (!r) { - return false; - } - } - } - return true; - } - - private boolean validateExprNodeDesc(ExprNodeDesc desc) { - return validateExprNodeDesc(desc, VectorExpressionDescriptor.Mode.PROJECTION); - } - - boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc, mode)) { - return false; - } - try { - VectorizationContext vc = new ValidatorVectorizationContext(); - if (vc.getVectorExpression(desc, mode) == null) { - // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); - return false; - } - } catch (Exception e) { - LOG.info("Failed to vectorize", e); - return false; - } - return true; - } - - private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { - if (VectorizationContext.isCustomUDF(genericUDFExpr)) { - return true; - } - GenericUDF genericUDF = genericUDFExpr.getGenericUDF(); - if (genericUDF instanceof GenericUDFBridge) { - Class udf = ((GenericUDFBridge) genericUDF).getUdfClass(); - return supportedGenericUDFs.contains(udf); - } else { - return supportedGenericUDFs.contains(genericUDF.getClass()); - } - } - - private boolean validateAggregationIsPrimitive(VectorAggregateExpression vectorAggrExpr) { - ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); - return (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE); - } - - private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduceMergePartial, - boolean hasKeys) { - - String udfName = aggDesc.getGenericUDAFName().toLowerCase(); - if (!supportedAggregationUdfs.contains(udfName)) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF " + udfName + " not supported"); - return false; - } - if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { - LOG.info("Cannot vectorize groupby aggregate expression: UDF parameters not supported"); - return false; - } - - // See if we can vectorize the aggregation. - VectorizationContext vc = new ValidatorVectorizationContext(); - VectorAggregateExpression vectorAggrExpr; - try { - vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduceMergePartial); - } catch (Exception e) { - // We should have already attempted to vectorize in validateAggregationDesc. - LOG.info("Vectorization of aggreation should have succeeded ", e); - return false; - } - - if (isReduceMergePartial && hasKeys && !validateAggregationIsPrimitive(vectorAggrExpr)) { - LOG.info("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); - return false; - } - - return true; - } - - private boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) { - type = type.toLowerCase(); - boolean result = supportedDataTypesPattern.matcher(type).matches(); - if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { - return false; - } - return result; - } - - private VectorizationContext getVectorizationContext(RowSchema rowSchema, String contextName, - Map typeNameMap) { - - VectorizationContext vContext = new VectorizationContext(contextName); - - // Add all non-virtual columns to make a vectorization context for - // the TableScan operator. - int i = 0; - for (ColumnInfo c : rowSchema.getSignature()) { - // Earlier, validation code should have eliminated virtual columns usage (HIVE-5560). - if (!isVirtualColumn(c)) { - vContext.addInitialColumn(c.getInternalName()); - typeNameMap.put(i, c.getTypeName()); - i++; - } - } - vContext.finishedAddingInitialColumns(); - - return vContext; - } - - private void fixupParentChildOperators(Operator op, - Operator vectorOp) { - if (op.getParentOperators() != null) { - vectorOp.setParentOperators(op.getParentOperators()); - for (Operator p : op.getParentOperators()) { - p.replaceChild(op, vectorOp); - } - } - if (op.getChildOperators() != null) { - vectorOp.setChildOperators(op.getChildOperators()); - for (Operator c : op.getChildOperators()) { - c.replaceParent(op, vectorOp); - } - } - } - - private boolean isBigTableOnlyResults(MapJoinDesc desc) { - Byte[] order = desc.getTagOrder(); - byte posBigTable = (byte) desc.getPosBigTable(); - Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); - - int[] smallTableIndices; - int smallTableIndicesSize; - if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { - smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); - LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices " + Arrays.toString(smallTableIndices)); - smallTableIndicesSize = smallTableIndices.length; - } else { - smallTableIndices = null; - LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices EMPTY"); - smallTableIndicesSize = 0; - } - - List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); - LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainList " + smallTableRetainList); - int smallTableRetainSize = smallTableRetainList.size(); - - if (smallTableIndicesSize > 0) { - // Small table indices has priority over retain. - for (int i = 0; i < smallTableIndicesSize; i++) { - if (smallTableIndices[i] < 0) { - // Negative numbers indicate a column to be (deserialize) read from the small table's - // LazyBinary value row. - LOG.info("Vectorizer isBigTableOnlyResults smallTableIndices[i] < 0 returning false"); - return false; - } - } - } else if (smallTableRetainSize > 0) { - LOG.info("Vectorizer isBigTableOnlyResults smallTableRetainSize > 0 returning false"); - return false; - } - - LOG.info("Vectorizer isBigTableOnlyResults returning true"); - return true; - } - - Operator specializeMapJoinOperator(Operator op, - VectorizationContext vContext, MapJoinDesc desc) throws HiveException { - Operator vectorOp = null; - Class> opClass = null; - - VectorMapJoinDesc.HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; - VectorMapJoinDesc.HashTableKind hashTableKind = HashTableKind.NONE; - VectorMapJoinDesc.HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; - - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { - hashTableImplementationType = HashTableImplementationType.FAST; - } else { - // Restrict to using BytesBytesMultiHashMap via MapJoinBytesTableContainer or - // HybridHashTableContainer. - hashTableImplementationType = HashTableImplementationType.OPTIMIZED; - } - - int joinType = desc.getConds()[0].getType(); - - boolean isInnerBigOnly = false; - if (joinType == JoinDesc.INNER_JOIN && isBigTableOnlyResults(desc)) { - isInnerBigOnly = true; - } - - // By default, we can always use the multi-key class. - hashTableKeyType = HashTableKeyType.MULTI_KEY; - - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED)) { - - // Look for single column optimization. - byte posBigTable = (byte) desc.getPosBigTable(); - Map> keyExprs = desc.getKeys(); - List bigTableKeyExprs = keyExprs.get(posBigTable); - if (bigTableKeyExprs.size() == 1) { - String typeName = bigTableKeyExprs.get(0).getTypeString(); - LOG.info("Vectorizer vectorizeOperator map join typeName " + typeName); - if (typeName.equals("boolean")) { - hashTableKeyType = HashTableKeyType.BOOLEAN; - } else if (typeName.equals("tinyint")) { - hashTableKeyType = HashTableKeyType.BYTE; - } else if (typeName.equals("smallint")) { - hashTableKeyType = HashTableKeyType.SHORT; - } else if (typeName.equals("int")) { - hashTableKeyType = HashTableKeyType.INT; - } else if (typeName.equals("bigint") || typeName.equals("long")) { - hashTableKeyType = HashTableKeyType.LONG; - } else if (VectorizationContext.isStringFamily(typeName)) { - hashTableKeyType = HashTableKeyType.STRING; - } - } - } - - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - hashTableKind = HashTableKind.HASH_MAP; - } else { - hashTableKind = HashTableKind.HASH_MULTISET; - } - break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - hashTableKind = HashTableKind.HASH_MAP; - break; - case JoinDesc.LEFT_SEMI_JOIN: - hashTableKind = HashTableKind.HASH_SET; - break; - default: - throw new HiveException("Unknown join type " + joinType); - } - - LOG.info("Vectorizer vectorizeOperator map join hashTableKind " + hashTableKind.name() + " hashTableKeyType " + hashTableKeyType.name()); - - switch (hashTableKeyType) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerLongOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyLongOperator.class; - } - break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterLongOperator.class; - break; - case JoinDesc.LEFT_SEMI_JOIN: - opClass = VectorMapJoinLeftSemiLongOperator.class; - break; - default: - throw new HiveException("Unknown join type " + joinType); - } - break; - case STRING: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerStringOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyStringOperator.class; - } - break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterStringOperator.class; - break; - case JoinDesc.LEFT_SEMI_JOIN: - opClass = VectorMapJoinLeftSemiStringOperator.class; - break; - default: - throw new HiveException("Unknown join type " + joinType); - } - break; - case MULTI_KEY: - switch (joinType) { - case JoinDesc.INNER_JOIN: - if (!isInnerBigOnly) { - opClass = VectorMapJoinInnerMultiKeyOperator.class; - } else { - opClass = VectorMapJoinInnerBigOnlyMultiKeyOperator.class; - } - break; - case JoinDesc.LEFT_OUTER_JOIN: - case JoinDesc.RIGHT_OUTER_JOIN: - opClass = VectorMapJoinOuterMultiKeyOperator.class; - break; - case JoinDesc.LEFT_SEMI_JOIN: - opClass = VectorMapJoinLeftSemiMultiKeyOperator.class; - break; - default: - throw new HiveException("Unknown join type " + joinType); - } - break; - } - - vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext); - LOG.info("Vectorizer vectorizeOperator map join class " + vectorOp.getClass().getSimpleName()); - - boolean minMaxEnabled = HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED); - - VectorMapJoinDesc vectorDesc = desc.getVectorDesc(); - vectorDesc.setHashTableImplementationType(hashTableImplementationType); - vectorDesc.setHashTableKind(hashTableKind); - vectorDesc.setHashTableKeyType(hashTableKeyType); - vectorDesc.setMinMaxEnabled(minMaxEnabled); - return vectorOp; - } - - private boolean onExpressionHasNullSafes(MapJoinDesc desc) { - boolean[] nullSafes = desc.getNullSafes(); - for (boolean nullSafe : nullSafes) { - if (nullSafe) { - return true; - } - } - return false; - } - - private boolean canSpecializeMapJoin(Operator op, MapJoinDesc desc, - boolean isTez) { - - boolean specialize = false; - - if (op instanceof MapJoinOperator && - HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED)) { - - // Currently, only under Tez and non-N-way joins. - if (isTez && desc.getConds().length == 1 && !onExpressionHasNullSafes(desc)) { - - // Ok, all basic restrictions satisfied so far... - specialize = true; - - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED)) { - - // We are using the optimized hash table we have further - // restrictions (using optimized and key type). - - if (!HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE)) { - specialize = false; - } else { - byte posBigTable = (byte) desc.getPosBigTable(); - Map> keyExprs = desc.getKeys(); - List bigTableKeyExprs = keyExprs.get(posBigTable); - for (ExprNodeDesc exprNodeDesc : bigTableKeyExprs) { - String typeName = exprNodeDesc.getTypeString(); - if (!MapJoinKey.isSupportedField(typeName)) { - specialize = false; - break; - } - } - } - } else { - - // With the fast hash table implementation, we currently do not support - // Hybrid Grace Hash Join. - - if (HiveConf.getBoolVar(hiveConf, - HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)) { - specialize = false; - } - } - } - } - return specialize; - } - - Operator vectorizeOperator(Operator op, - VectorizationContext vContext, boolean isTez) throws HiveException { - Operator vectorOp = null; - - switch (op.getType()) { - case MAPJOIN: - { - MapJoinDesc desc = (MapJoinDesc) op.getConf(); - boolean specialize = canSpecializeMapJoin(op, desc, isTez); - - if (!specialize) { - - Class> opClass = null; - if (op instanceof MapJoinOperator) { - - // *NON-NATIVE* vector map differences for LEFT OUTER JOIN and Filtered... - - List bigTableFilters = desc.getFilters().get((byte) desc.getPosBigTable()); - boolean isOuterAndFiltered = (!desc.isNoOuterJoin() && bigTableFilters.size() > 0); - if (!isOuterAndFiltered) { - opClass = VectorMapJoinOperator.class; - } else { - opClass = VectorMapJoinOuterFilteredOperator.class; - } - } else if (op instanceof SMBMapJoinOperator) { - opClass = VectorSMBMapJoinOperator.class; - } - - vectorOp = OperatorFactory.getVectorOperator(opClass, op.getConf(), vContext); - - } else { - - // TEMPORARY Until Native Vector Map Join with Hybrid passes tests... - // HiveConf.setBoolVar(physicalContext.getConf(), - // HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN, false); - - vectorOp = specializeMapJoinOperator(op, vContext, desc); - } - } - break; - case GROUPBY: - case FILTER: - case SELECT: - case FILESINK: - case REDUCESINK: - case LIMIT: - case EXTRACT: - case EVENT: - case HASHTABLESINK: - vectorOp = OperatorFactory.getVectorOperator(op.getConf(), vContext); - break; - default: - vectorOp = op; - break; - } - - LOG.info("vectorizeOperator " + (vectorOp == null ? "NULL" : vectorOp.getClass().getName())); - LOG.info("vectorizeOperator " + (vectorOp == null || vectorOp.getConf() == null ? "NULL" : vectorOp.getConf().getClass().getName())); - - if (vectorOp != op) { - fixupParentChildOperators(op, vectorOp); - ((AbstractOperatorDesc) vectorOp.getConf()).setVectorMode(true); - } - return vectorOp; - } - - private boolean isVirtualColumn(ColumnInfo column) { - - // Not using method column.getIsVirtualCol() because partitioning columns are also - // treated as virtual columns in ColumnInfo. - if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(column.getInternalName())) { - return true; - } - return false; - } - - public void debugDisplayAllMaps(BaseWork work) { - - Map columnNameMap = work.getVectorColumnNameMap(); - Map columnTypeMap = work.getVectorColumnTypeMap(); - Map scratchColumnTypeMap = work.getVectorScratchColumnTypeMap(); - - LOG.debug("debugDisplayAllMaps columnNameMap " + columnNameMap.toString()); - LOG.debug("debugDisplayAllMaps columnTypeMap " + columnTypeMap.toString()); - LOG.debug("debugDisplayAllMaps scratchColumnTypeMap " + scratchColumnTypeMap.toString()); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index ed896e4..727f842 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -28,8 +28,10 @@ import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** @@ -61,9 +63,7 @@ public BaseWork(String name) { // Vectorization. - protected Map vectorColumnNameMap; - protected Map vectorColumnTypeMap; - protected Map vectorScratchColumnTypeMap; + protected VectorizedRowBatchCtx vectorizedRowBatchCtx; public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; @@ -145,29 +145,17 @@ public void addDummyOp(HashTableDummyOperator dummyOp) { return returnSet; } - public Map getVectorColumnNameMap() { - return vectorColumnNameMap; - } - - public void setVectorColumnNameMap(Map vectorColumnNameMap) { - this.vectorColumnNameMap = vectorColumnNameMap; - } + // ----------------------------------------------------------------------------------------------- - public Map getVectorColumnTypeMap() { - return vectorColumnTypeMap; + public VectorizedRowBatchCtx getVectorizedRowBatchCtx() { + return vectorizedRowBatchCtx; } - public void setVectorColumnTypeMap(Map vectorColumnTypeMap) { - this.vectorColumnTypeMap = vectorColumnTypeMap; + public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx) { + this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - public Map getVectorScratchColumnTypeMap() { - return vectorScratchColumnTypeMap; - } - - public void setVectorScratchColumnTypeMap(Map vectorScratchColumnTypeMap) { - this.vectorScratchColumnTypeMap = vectorScratchColumnTypeMap; - } + // ----------------------------------------------------------------------------------------------- /** * @return the mapredLocalWork diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 864301c..b032349 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -68,11 +68,13 @@ private String baseFileName; + private VectorPartitionDesc vectorPartitionDesc; + public void setBaseFileName(String baseFileName) { this.baseFileName = baseFileName; } - public PartitionDesc() { + public PartitionDesc() { } public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { @@ -271,6 +273,9 @@ public PartitionDesc clone() { ret.partSpec = new java.util.LinkedHashMap(); ret.partSpec.putAll(partSpec); } + if (vectorPartitionDesc != null) { + ret.vectorPartitionDesc = vectorPartitionDesc.clone(); + } return ret; } @@ -300,4 +305,12 @@ public void deriveBaseFileName(String path) { public void intern(Interner interner) { this.tableDesc = interner.intern(tableDesc); } + + public void setVectorPartitionDesc(VectorPartitionDesc vectorPartitionDesc) { + this.vectorPartitionDesc = vectorPartitionDesc; + } + + public VectorPartitionDesc getVectorPartitionDesc() { + return vectorPartitionDesc; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java new file mode 100644 index 0000000..8fe298d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionConversion.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.HashMap; +import java.util.List; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * PartitionConversion. + * + */ +public class VectorPartitionConversion { + + private static long serialVersionUID = 1L; + + private boolean validConversion; + private boolean[] resultConversionFlags; + + private TypeInfo invalidFromTypeInfo; + private TypeInfo invalidToTypeInfo; + + public boolean getValidConversion() { + return validConversion; + } + + public boolean[] getResultConversionFlags() { + return resultConversionFlags; + } + + public TypeInfo getInvalidFromTypeInfo() { + return invalidFromTypeInfo; + } + + public TypeInfo getInvalidToTypeInfo() { + return invalidToTypeInfo; + } + + // Currently, we only support these no-precision-loss or promotion data type conversions: + // // + // Short -> Int IMPLICIT WITH VECTORIZATION + // Short -> BigInt IMPLICIT WITH VECTORIZATION + // Int --> BigInt IMPLICIT WITH VECTORIZATION + // + // CONSIDER ADDING: + // Float -> Double IMPLICIT WITH VECTORIZATION + // (Char | VarChar) -> String IMPLICIT WITH VECTORIZATION + // + private static HashMap validFromPrimitiveMap = + new HashMap(); + static { + validFromPrimitiveMap.put( + PrimitiveCategory.SHORT, + new PrimitiveCategory[] { PrimitiveCategory.INT, PrimitiveCategory.LONG }); + validFromPrimitiveMap.put( + PrimitiveCategory.INT, + new PrimitiveCategory[] { PrimitiveCategory.LONG }); + } + + private boolean validateOne(TypeInfo fromTypeInfo, TypeInfo toTypeInfo) { + + if (fromTypeInfo.equals(toTypeInfo)) { + return false; + } + + if (fromTypeInfo.getCategory() == Category.PRIMITIVE && + toTypeInfo.getCategory() == Category.PRIMITIVE) { + + PrimitiveCategory fromPrimitiveCategory = ((PrimitiveTypeInfo) fromTypeInfo).getPrimitiveCategory(); + PrimitiveCategory toPrimitiveCategory = ((PrimitiveTypeInfo) toTypeInfo).getPrimitiveCategory(); + + PrimitiveCategory[] toPrimitiveCategories = + validFromPrimitiveMap.get(fromPrimitiveCategory); + if (toPrimitiveCategories == null || + !ArrayUtils.contains(toPrimitiveCategories, toPrimitiveCategory)) { + invalidFromTypeInfo = fromTypeInfo; + invalidToTypeInfo = toTypeInfo; + + // Tell caller a bad one was found. + validConversion = false; + return false; + } + } else { + // Ignore checking complex types. Assume they will not be included in the query. + } + + return true; + } + + public void validateConversion(List fromTypeInfoList, + List toTypeInfoList) { + + final int columnCount = fromTypeInfoList.size(); + resultConversionFlags = new boolean[columnCount]; + + // The method validateOne will turn this off when invalid conversion is found. + validConversion = true; + + boolean atLeastOneConversion = false; + for (int i = 0; i < columnCount; i++) { + TypeInfo fromTypeInfo = fromTypeInfoList.get(i); + TypeInfo toTypeInfo = toTypeInfoList.get(i); + + resultConversionFlags[i] = validateOne(fromTypeInfo, toTypeInfo); + if (!validConversion) { + return; + } + } + + if (atLeastOneConversion) { + // Leave resultConversionFlags set. + } else { + resultConversionFlags = null; + } + } + + public void validateConversion(TypeInfo[] fromTypeInfos, TypeInfo[] toTypeInfos) { + + final int columnCount = fromTypeInfos.length; + resultConversionFlags = new boolean[columnCount]; + + // The method validateOne will turn this off when invalid conversion is found. + validConversion = true; + + boolean atLeastOneConversion = false; + for (int i = 0; i < columnCount; i++) { + TypeInfo fromTypeInfo = fromTypeInfos[i]; + TypeInfo toTypeInfo = toTypeInfos[i]; + + resultConversionFlags[i] = validateOne(fromTypeInfo, toTypeInfo); + if (!validConversion) { + return; + } + if (resultConversionFlags[i]) { + atLeastOneConversion = true; + } + } + + if (atLeastOneConversion) { + // Leave resultConversionFlags set. + } else { + resultConversionFlags = null; + } + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java new file mode 100644 index 0000000..45151f2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorPartitionDesc.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +/** + * VectorMapDesc. + * + * Extra vector information just for the PartitionDesc. + * + */ +public class VectorPartitionDesc { + + private static long serialVersionUID = 1L; + + // Data Type Conversion Needed? + // + // VECTORIZED_INPUT_FILE_FORMAT: + // No data type conversion check? Assume ALTER TABLE prevented conversions that + // VectorizedInputFileFormat cannot handle... + // + + public static enum VectorMapOperatorReadType { + NONE, + VECTORIZED_INPUT_FILE_FORMAT + } + + + private final VectorMapOperatorReadType vectorMapOperatorReadType; + + private final boolean needsDataTypeConversionCheck; + + private boolean[] conversionFlags; + + private TypeInfo[] typeInfos; + + private VectorPartitionDesc(VectorMapOperatorReadType vectorMapOperatorReadType, + boolean needsDataTypeConversionCheck) { + this.vectorMapOperatorReadType = vectorMapOperatorReadType; + this.needsDataTypeConversionCheck = needsDataTypeConversionCheck; + + conversionFlags = null; + typeInfos = null; + } + + public static VectorPartitionDesc createVectorizedInputFileFormat() { + return new VectorPartitionDesc(VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT, true); + } + + + @Override + public VectorPartitionDesc clone() { + VectorPartitionDesc result = + new VectorPartitionDesc(vectorMapOperatorReadType, + needsDataTypeConversionCheck); + result.conversionFlags = + (conversionFlags == null ? null : + Arrays.copyOf(conversionFlags, conversionFlags.length)); + result.typeInfos = Arrays.copyOf(typeInfos, typeInfos.length); + return result; + } + + public VectorMapOperatorReadType getVectorMapOperatorReadType() { + return vectorMapOperatorReadType; + } + + public boolean getNeedsDataTypeConversionCheck() { + return needsDataTypeConversionCheck; + } + + public void setConversionFlags(boolean[] conversionFlags) { + this.conversionFlags = conversionFlags; + } + + public boolean[] getConversionFlags() { + return conversionFlags; + } + + public TypeInfo[] getTypeInfos() { + return typeInfos; + } + + public void setTypeInfos(List typeInfoList) { + typeInfos = typeInfoList.toArray(new TypeInfo[0]); + } + + public int getNonPartColumnCount() { + return typeInfos.length; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index 0f8712e..c076e6c 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import junit.framework.TestCase; @@ -50,13 +51,13 @@ void examineBatch(VectorizedRowBatch batch, VectorExtractRowSameBatch vectorExtr void testVectorRowObject(int caseNum, Random r) throws HiveException { - Map emptyScratchMap = new HashMap(); + String[] emptyScratchTypeNames = new String[0]; RandomRowObjectSource source = new RandomRowObjectSource(); source.init(r); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); - batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index 23e44f0..d3dc30d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.io.BooleanWritable; @@ -331,13 +332,13 @@ void serializeBatch(VectorizedRowBatch batch, VectorSerializeRow vectorSerialize void testVectorSerializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { - Map emptyScratchMap = new HashMap(); + String[] emptyScratchTypeNames = new String[0]; RandomRowObjectSource source = new RandomRowObjectSource(); source.init(r); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); - batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); VectorAssignRowSameBatch vectorAssignRow = new VectorAssignRowSameBatch(); @@ -563,13 +564,13 @@ private LazySerDeParameters getSerDeParams(StructObjectInspector rowObjectInspec void testVectorDeserializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException { - Map emptyScratchMap = new HashMap(); + String[] emptyScratchTypeNames = new String[0]; RandomRowObjectSource source = new RandomRowObjectSource(); source.init(r); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); - batchContext.init(emptyScratchMap, source.rowStructObjectInspector()); + batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); VectorizedRowBatch batch = batchContext.createVectorizedRowBatch(); int fieldCount = source.typeNames().size(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java deleted file mode 100644 index 473ebac..0000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java +++ /dev/null @@ -1,355 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.io.File; -import java.io.IOException; -import java.sql.Timestamp; -import java.util.Arrays; -import java.util.Calendar; -import java.util.List; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.ObjectWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -/** - * Class that tests the functionality of VectorizedRowBatchCtx. - */ -public class TestVectorizedRowBatchCtx { - - private Configuration conf; - private FileSystem fs; - private Path testFilePath; - private int colCount; - private ColumnarSerDe serDe; - private Properties tbl; - - @Before - public void openFileSystem() throws Exception { - conf = new Configuration(); - fs = FileSystem.getLocal(conf); - Path workDir = new Path(System.getProperty("test.tmp.dir", - "target" + File.separator + "test" + File.separator + "tmp")); - fs.setWorkingDirectory(workDir); - testFilePath = new Path("TestVectorizedRowBatchCtx.testDump.rc"); - fs.delete(testFilePath, false); - } - - private void initSerde() { - tbl = new Properties(); - - // Set the configuration parameters - tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "6"); - tbl.setProperty("columns", - "ashort,aint,along,adouble,afloat,astring,abyte,aboolean,atimestamp"); - tbl.setProperty("columns.types", - "smallint:int:bigint:double:float:string:tinyint:boolean:timestamp"); - colCount = 9; - tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); - - try { - serDe = new ColumnarSerDe(); - SerDeUtils.initializeSerDe(serDe, conf, tbl, null); - } catch (SerDeException e) { - throw new RuntimeException(e); - } - } - - private void WriteRCFile(FileSystem fs, Path file, Configuration conf) - throws IOException, SerDeException { - fs.delete(file, true); - - RCFileOutputFormat.setColumnNumber(conf, colCount); - RCFile.Writer writer = - new RCFile.Writer(fs, conf, file, null, null, - new DefaultCodec()); - - for (int i = 0; i < 10; ++i) { - BytesRefArrayWritable bytes = new BytesRefArrayWritable(colCount); - BytesRefWritable cu; - - if (i % 3 != 0) { - //if (i < 100) { - cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length); - bytes.set(0, cu); - - cu = new BytesRefWritable((i + 100 + "").getBytes("UTF-8"), 0, - (i + 100 + "").getBytes("UTF-8").length); - bytes.set(1, cu); - - cu = new BytesRefWritable((i + 200 + "").getBytes("UTF-8"), 0, - (i + 200 + "").getBytes("UTF-8").length); - bytes.set(2, cu); - - cu = new BytesRefWritable((i + 1.23 + "").getBytes("UTF-8"), 0, - (i + 1.23 + "").getBytes("UTF-8").length); - bytes.set(3, cu); - - cu = new BytesRefWritable((i + 2.23 + "").getBytes("UTF-8"), 0, - (i + 2.23 + "").getBytes("UTF-8").length); - bytes.set(4, cu); - - cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0, - ("Test string").getBytes("UTF-8").length); - bytes.set(5, cu); - - cu = new BytesRefWritable((1 + "").getBytes("UTF-8"), 0, - (1 + "").getBytes("UTF-8").length); - bytes.set(6, cu); - - cu = new BytesRefWritable(("true").getBytes("UTF-8"), 0, - ("true").getBytes("UTF-8").length); - bytes.set(7, cu); - - Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime()); - cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0, - t.toString().getBytes("UTF-8").length); - bytes.set(8, cu); - - } else { - cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length); - bytes.set(0, cu); - - cu = new BytesRefWritable(new byte[0], 0, 0); - bytes.set(1, cu); - - cu = new BytesRefWritable(new byte[0], 0, 0); - bytes.set(2, cu); - - cu = new BytesRefWritable(new byte[0], 0, 0); - bytes.set(3, cu); - - cu = new BytesRefWritable(new byte[0], 0, 0); - bytes.set(4, cu); - - cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0, - ("Test string").getBytes("UTF-8").length); - bytes.set(5, cu); - - cu = new BytesRefWritable(new byte[0], 0, 0); - bytes.set(6, cu); - - cu = new BytesRefWritable(new byte[0], 0, 0); - bytes.set(7, cu); - -// cu = new BytesRefWritable(new byte[0], 0, 0); -// bytes.set(8, cu); - Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime()); - cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0, - t.toString().getBytes("UTF-8").length); - bytes.set(8, cu); - } - writer.append(bytes); - } - writer.close(); - } - - private VectorizedRowBatch GetRowBatch() throws SerDeException, HiveException, IOException { - - RCFile.Reader reader = new RCFile.Reader(fs, this.testFilePath, conf); - DataOutputBuffer buffer = new DataOutputBuffer(); - - // Get object inspector - StructObjectInspector oi = (StructObjectInspector) serDe - .getObjectInspector(); - List fieldRefs = oi.getAllStructFieldRefs(); - - Assert.assertEquals("Field size should be 9", colCount, fieldRefs.size()); - - // Create the context - VectorizedRowBatchCtx ctx = new VectorizedRowBatchCtx(oi, oi, serDe, null, null); - VectorizedRowBatch batch = ctx.createVectorizedRowBatch(); - VectorizedBatchUtil.setNoNullFields(batch); - - // Iterate thru the rows and populate the batch - LongWritable rowID = new LongWritable(); - for (int i = 0; i < 10; i++) { - reader.next(rowID); - BytesRefArrayWritable cols = new BytesRefArrayWritable(); - reader.getCurrentRow(cols); - cols.resetValid(colCount); - ctx.addRowToBatch(i, cols, batch, buffer); - } - reader.close(); - batch.size = 10; - return batch; - } - - void ValidateRowBatch(VectorizedRowBatch batch) throws IOException, SerDeException { - - LongWritable rowID = new LongWritable(); - RCFile.Reader reader = new RCFile.Reader(fs, this.testFilePath, conf); - for (int i = 0; i < batch.size; i++) { - reader.next(rowID); - BytesRefArrayWritable cols = new BytesRefArrayWritable(); - reader.getCurrentRow(cols); - cols.resetValid(colCount); - Object row = serDe.deserialize(cols); - - StructObjectInspector oi = (StructObjectInspector) serDe - .getObjectInspector(); - List fieldRefs = oi.getAllStructFieldRefs(); - - for (int j = 0; j < fieldRefs.size(); j++) { - Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j)); - ObjectInspector foi = fieldRefs.get(j).getFieldObjectInspector(); - - // Vectorization only supports PRIMITIVE data types. Assert the same - Assert.assertEquals(true, foi.getCategory() == Category.PRIMITIVE); - - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; - Object writableCol = poi.getPrimitiveWritableObject(fieldData); - if (writableCol != null) { - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: { - LongColumnVector lcv = (LongColumnVector) batch.cols[j]; - Assert.assertEquals(true, lcv.vector[i] == (((BooleanWritable) writableCol).get() ? 1 - : 0)); - } - break; - case BYTE: { - LongColumnVector lcv = (LongColumnVector) batch.cols[j]; - Assert.assertEquals(true, lcv.vector[i] == (long) ((ByteWritable) writableCol).get()); - } - break; - case SHORT: { - LongColumnVector lcv = (LongColumnVector) batch.cols[j]; - Assert.assertEquals(true, lcv.vector[i] == ((ShortWritable) writableCol).get()); - } - break; - case INT: { - LongColumnVector lcv = (LongColumnVector) batch.cols[j]; - Assert.assertEquals(true, lcv.vector[i] == ((IntWritable) writableCol).get()); - } - break; - case LONG: { - LongColumnVector lcv = (LongColumnVector) batch.cols[j]; - Assert.assertEquals(true, lcv.vector[i] == ((LongWritable) writableCol).get()); - } - break; - case FLOAT: { - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[j]; - Assert.assertEquals(true, dcv.vector[i] == ((FloatWritable) writableCol).get()); - } - break; - case DOUBLE: { - DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[j]; - Assert.assertEquals(true, dcv.vector[i] == ((DoubleWritable) writableCol).get()); - } - break; - case BINARY: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[j]; - BytesWritable colBinary = (BytesWritable) writableCol; - BytesWritable batchBinary = (BytesWritable) bcv.getWritableObject(i); - byte[] a = colBinary.getBytes(); - byte[] b = batchBinary.getBytes(); - Assert.assertEquals(true, Arrays.equals(a, b)); - } - break; - case STRING: { - BytesColumnVector bcv = (BytesColumnVector) batch.cols[j]; - Text colText = (Text) writableCol; - Text batchText = (Text) bcv.getWritableObject(i); - String a = colText.toString(); - String b = batchText.toString(); - Assert.assertEquals(true, a.equals(b)); - } - break; - case TIMESTAMP: { - LongColumnVector tcv = (LongColumnVector) batch.cols[j]; - Timestamp t = ((TimestampWritable) writableCol).getTimestamp(); - long timeInNanoSec = (t.getTime() * 1000000) + (t.getNanos() % 1000000); - Assert.assertEquals(true, tcv.vector[i] == timeInNanoSec); - } - break; - default: - Assert.assertTrue("Unknown type", false); - } - } else { - Assert.assertEquals(true, batch.cols[j].isNull[i]); - } - } - - // Check repeating - Assert.assertEquals(false, batch.cols[0].isRepeating); - Assert.assertEquals(false, batch.cols[1].isRepeating); - Assert.assertEquals(false, batch.cols[2].isRepeating); - Assert.assertEquals(false, batch.cols[3].isRepeating); - Assert.assertEquals(false, batch.cols[4].isRepeating); - - // Check non null - Assert.assertEquals(true, batch.cols[0].noNulls); - Assert.assertEquals(false, batch.cols[1].noNulls); - Assert.assertEquals(false, batch.cols[2].noNulls); - Assert.assertEquals(false, batch.cols[3].noNulls); - Assert.assertEquals(false, batch.cols[4].noNulls); - } - reader.close(); - } - - @Test - public void TestCtx() throws Exception { - initSerde(); - WriteRCFile(this.fs, this.testFilePath, this.conf); - VectorizedRowBatch batch = GetRowBatch(); - ValidateRowBatch(batch); - - // Test VectorizedColumnarSerDe - VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe(); - SerDeUtils.initializeSerDe(vcs, this.conf, tbl, null); - Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe - .getObjectInspector()); - BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[]) ((ObjectWritable) w).get(); - vcs.deserializeVector(refArray, 10, batch); - ValidateRowBatch(batch); - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index b9d6c27..b9eec92 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; @@ -66,9 +67,11 @@ import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.SplitStrategy; +import org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.MyRow; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; @@ -189,6 +192,7 @@ public String toString() { builder.append("}"); return builder.toString(); } + } public static class BigRowField implements StructField { @@ -331,6 +335,7 @@ public String getTypeName() { public Category getCategory() { return Category.STRUCT; } + } public static class MyRow implements Writable { @@ -350,6 +355,15 @@ public void write(DataOutput dataOutput) throws IOException { public void readFields(DataInput dataInput) throws IOException { throw new UnsupportedOperationException("no read"); } + + + static String getColumnNamesProperty() { + return "x,y"; + } + static String getColumnTypesProperty() { + return "int:int"; + } + } @Rule @@ -1130,6 +1144,8 @@ public void testInOutFormat() throws Exception { // read the whole file + conf.set("columns", MyRow.getColumnNamesProperty()); + conf.set("columns.types", MyRow.getColumnTypesProperty()); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); @@ -1250,6 +1266,8 @@ public void testMROutput() throws Exception { InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1)); + conf.set("columns", "z,r"); + conf.set("columns.types", "int:struct"); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); @@ -1330,6 +1348,14 @@ public void write(DataOutput dataOutput) throws IOException { public void readFields(DataInput dataInput) throws IOException { throw new UnsupportedOperationException("no read"); } + + static String getColumnNamesProperty() { + return "str,str2"; + } + static String getColumnTypesProperty() { + return "string:string"; + } + } @Test @@ -1365,6 +1391,8 @@ public void testDefaultTypes() throws Exception { assertEquals(1, splits.length); // read the whole file + conf.set("columns", StringRow.getColumnNamesProperty()); + conf.set("columns.types", StringRow.getColumnTypesProperty()); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); @@ -1405,6 +1433,7 @@ public void testDefaultTypes() throws Exception { * @param isVectorized should run vectorized * @return a JobConf that contains the necessary information * @throws IOException + * @throws HiveException */ JobConf createMockExecutionEnvironment(Path workDir, Path warehouseDir, @@ -1412,9 +1441,9 @@ JobConf createMockExecutionEnvironment(Path workDir, ObjectInspector objectInspector, boolean isVectorized, int partitions - ) throws IOException { - Utilities.clearWorkMap(); + ) throws IOException, HiveException { JobConf conf = new JobConf(); + Utilities.clearWorkMap(); conf.set("hive.exec.plan", workDir.toString()); conf.set("mapred.job.tracker", "local"); conf.set("hive.vectorized.execution.enabled", Boolean.toString(isVectorized)); @@ -1467,6 +1496,11 @@ JobConf createMockExecutionEnvironment(Path workDir, MapWork mapWork = new MapWork(); mapWork.setVectorMode(isVectorized); + if (isVectorized) { + VectorizedRowBatchCtx vectorizedRowBatchCtx = new VectorizedRowBatchCtx(); + vectorizedRowBatchCtx.init(structOI, new String[0]); + mapWork.setVectorizedRowBatchCtx(vectorizedRowBatchCtx); + } mapWork.setUseBucketizedHiveInputFormat(false); LinkedHashMap> aliasMap = new LinkedHashMap>(); @@ -1529,6 +1563,8 @@ public void testVectorization() throws Exception { InputSplit[] splits = inputFormat.getSplits(conf, 10); assertEquals(1, splits.length); + conf.set("columns", MyRow.getColumnNamesProperty()); + conf.set("columns.types", MyRow.getColumnTypesProperty()); org.apache.hadoop.mapred.RecordReader reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL); NullWritable key = reader.createKey(); @@ -1578,6 +1614,8 @@ public void testVectorizationWithBuckets() throws Exception { InputSplit[] splits = inputFormat.getSplits(conf, 10); assertEquals(1, splits.length); + conf.set("columns", MyRow.getColumnNamesProperty()); + conf.set("columns.types", MyRow.getColumnTypesProperty()); org.apache.hadoop.mapred.RecordReader reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL); NullWritable key = reader.createKey(); @@ -1646,8 +1684,11 @@ public void testVectorizationWithAcid() throws Exception { assertEquals("checking long " + i, i, longColumn.vector[i]); assertEquals("checking float " + i, i, floatColumn.vector[i], 0.0001); assertEquals("checking double " + i, i, doubleCoulmn.vector[i], 0.0001); + Text strValue = new Text(); + strValue.set(stringColumn.vector[i], stringColumn.start[i], + stringColumn.length[i]); assertEquals("checking string " + i, new Text(Long.toHexString(i)), - stringColumn.getWritableObject(i)); + strValue); assertEquals("checking decimal " + i, HiveDecimal.create(i), decimalColumn.vector[i].getHiveDecimal()); assertEquals("checking date " + i, i, dateColumn.vector[i]); @@ -1718,6 +1759,8 @@ public void testCombinationInputFormat() throws Exception { assertTrue(3 >= split.getLocations().length); // read split + conf.set("columns", MyRow.getColumnNamesProperty()); + conf.set("columns.types", MyRow.getColumnTypesProperty()); org.apache.hadoop.mapred.RecordReader reader = inputFormat.getRecordReader(split, conf, Reporter.NULL); CombineHiveKey key = reader.createKey(); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java index 39f71f1..bfdc83f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java @@ -27,13 +27,16 @@ import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.io.RecordUpdater; import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair; import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey; import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.IntWritable; @@ -48,6 +51,8 @@ import org.mockito.MockSettings; import org.mockito.Mockito; +import com.google.common.collect.Lists; + import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; @@ -354,6 +359,8 @@ public void testNewBase() throws Exception { Configuration conf = new Configuration(); conf.set("columns", "col1"); conf.set("columns.types", "string"); + conf.set(serdeConstants.LIST_COLUMNS, "col1"); + conf.set(serdeConstants.LIST_COLUMN_TYPES, "string"); Reader reader = Mockito.mock(Reader.class, settings); RecordReader recordReader = Mockito.mock(RecordReader.class, settings); @@ -362,6 +369,8 @@ public void testNewBase() throws Exception { typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1) .addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5) .addSubtypes(6); + typeBuilder.addAllFieldNames(Lists.newArrayList("operation", "originalTransaction", "bucket", + "rowId", "currentTransaction", "row")); types.add(typeBuilder.build()); types.add(null); types.add(null); @@ -370,6 +379,10 @@ public void testNewBase() throws Exception { types.add(null); typeBuilder.clearSubtypes(); typeBuilder.addSubtypes(7); + typeBuilder.addAllFieldNames(Lists.newArrayList("col1")); + types.add(typeBuilder.build()); + typeBuilder.clear(); + typeBuilder.setKind(OrcProto.Type.Kind.STRING); types.add(typeBuilder.build()); Mockito.when(reader.getTypes()).thenReturn(types); @@ -466,6 +479,14 @@ public void testNewBase() throws Exception { col1 = new Text(val); ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); } + + static String getColumnNamesProperty() { + return "col1,ROW__ID"; + } + static String getColumnTypesProperty() { + return "string:struct"; + } + } static String getValue(OrcStruct event) { @@ -499,6 +520,8 @@ public void testEmpty() throws Exception { BUCKET); Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf)); + conf.set("columns", MyRow.getColumnNamesProperty()); + conf.set("columns.types", MyRow.getColumnTypesProperty()); OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), @@ -567,6 +590,10 @@ private void testNewBaseAndDelta(boolean use130Format) throws Exception { Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET); + + conf.set("columns", MyRow.getColumnNamesProperty()); + conf.set("columns.types", MyRow.getColumnTypesProperty()); + Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf)); OrcRawRecordMerger merger = @@ -790,6 +817,13 @@ private void testNewBaseAndDelta(boolean use130Format) throws Exception { BigRow(long rowId, long origTxn, int bucket) { ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); } + + static String getColumnNamesProperty() { + return "myint,mylong,mytext,myfloat,mydouble,ROW__ID"; + } + static String getColumnTypesProperty() { + return "int:bigint:string:float:double:struct"; + } } /** @@ -863,6 +897,8 @@ synchronized void addedRow() throws IOException { InputFormat inf = new OrcInputFormat(); JobConf job = new JobConf(); + job.set("columns", BigRow.getColumnNamesProperty()); + job.set("columns.types", BigRow.getColumnTypesProperty()); job.set("mapred.min.split.size", "1"); job.set("mapred.max.split.size", "2"); job.set("mapred.input.dir", root.toString()); @@ -967,6 +1003,8 @@ synchronized void addedRow() throws IOException { job.set("mapred.min.split.size", "1"); job.set("mapred.max.split.size", "2"); job.set("mapred.input.dir", root.toString()); + job.set("columns", BigRow.getColumnNamesProperty()); + job.set("columns.types", BigRow.getColumnTypesProperty()); InputSplit[] splits = inf.getSplits(job, 5); assertEquals(5, splits.length); org.apache.hadoop.mapred.RecordReader rr; @@ -1037,6 +1075,8 @@ public void testRecordReaderDelta() throws Exception { job.set("mapred.max.split.size", "2"); job.set("mapred.input.dir", root.toString()); job.set("bucket_count", "1"); + job.set("columns", MyRow.getColumnNamesProperty()); + job.set("columns.types", MyRow.getColumnTypesProperty()); InputSplit[] splits = inf.getSplits(job, 5); assertEquals(1, splits.length); org.apache.hadoop.mapred.RecordReader rr; @@ -1104,6 +1144,8 @@ private void testRecordReaderIncompleteDelta(boolean use130Format) throws Except JobConf job = new JobConf(); job.set("mapred.input.dir", root.toString()); job.set("bucket_count", "2"); + job.set("columns", MyRow.getColumnNamesProperty()); + job.set("columns.types", MyRow.getColumnTypesProperty()); // read the keys before the delta is flushed InputSplit[] splits = inf.getSplits(job, 1); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig new file mode 100644 index 0000000..15ee24c --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcRawRecordMerger.java.orig @@ -0,0 +1,1150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.orc; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.ValidTxnList; +import org.apache.hadoop.hive.common.ValidReadTxnList; +import org.apache.hadoop.hive.ql.io.AcidOutputFormat; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.io.RecordUpdater; +import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.OriginalReaderPair; +import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey; +import org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderPair; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.junit.Test; +import org.mockito.MockSettings; +import org.mockito.Mockito; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; + +public class TestOrcRawRecordMerger { + + private static final Logger LOG = LoggerFactory.getLogger(TestOrcRawRecordMerger.class); +//todo: why is statementId -1? + @Test + public void testOrdering() throws Exception { + ReaderKey left = new ReaderKey(100, 200, 1200, 300); + ReaderKey right = new ReaderKey(); + right.setValues(100, 200, 1000, 200,1); + assertTrue(right.compareTo(left) < 0); + assertTrue(left.compareTo(right) > 0); + assertEquals(false, left.equals(right)); + left.set(right); + assertTrue(right.compareTo(left) == 0); + assertEquals(true, right.equals(left)); + right.setRowId(2000); + assertTrue(right.compareTo(left) > 0); + left.setValues(1, 2, 3, 4,-1); + right.setValues(100, 2, 3, 4,-1); + assertTrue(left.compareTo(right) < 0); + assertTrue(right.compareTo(left) > 0); + left.setValues(1, 2, 3, 4,-1); + right.setValues(1, 100, 3, 4,-1); + assertTrue(left.compareTo(right) < 0); + assertTrue(right.compareTo(left) > 0); + left.setValues(1, 2, 3, 100,-1); + right.setValues(1, 2, 3, 4,-1); + assertTrue(left.compareTo(right) < 0); + assertTrue(right.compareTo(left) > 0); + + // ensure that we are consistent when comparing to the base class + RecordIdentifier ri = new RecordIdentifier(1, 2, 3); + assertEquals(1, ri.compareTo(left)); + assertEquals(-1, left.compareTo(ri)); + assertEquals(false, ri.equals(left)); + assertEquals(false, left.equals(ri)); + } + + private static void setRow(OrcStruct event, + int operation, + long originalTransaction, + int bucket, + long rowId, + long currentTransaction, + String value) { + event.setFieldValue(OrcRecordUpdater.OPERATION, new IntWritable(operation)); + event.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION, + new LongWritable(originalTransaction)); + event.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket)); + event.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(rowId)); + event.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION, + new LongWritable(currentTransaction)); + OrcStruct row = new OrcStruct(1); + row.setFieldValue(0, new Text(value)); + event.setFieldValue(OrcRecordUpdater.ROW, row); + } + + private static String value(OrcStruct event) { + return OrcRecordUpdater.getRow(event).getFieldValue(0).toString(); + } + + private List createStripes(long... rowCounts) { + long offset = 0; + List result = + new ArrayList(rowCounts.length); + for(long count: rowCounts) { + OrcProto.StripeInformation.Builder stripe = + OrcProto.StripeInformation.newBuilder(); + stripe.setDataLength(800).setIndexLength(100).setFooterLength(100) + .setNumberOfRows(count).setOffset(offset); + offset += 1000; + result.add(new ReaderImpl.StripeInformationImpl(stripe.build())); + } + return result; + } + + // can add .verboseLogging() to cause Mockito to log invocations + private final MockSettings settings = Mockito.withSettings(); + private final Path tmpDir = new Path(System.getProperty("test.tmp.dir", + "target" + File.separator + "test" + File.separator + "tmp")); + + private Reader createMockReader() throws IOException { + Reader reader = Mockito.mock(Reader.class, settings); + RecordReader recordReader = Mockito.mock(RecordReader.class, settings); + OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); + OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); + OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); + OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); + OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); + Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) + .thenReturn(recordReader); + + Mockito.when(recordReader.hasNext()). + thenReturn(true, true, true, true, true, false); + + Mockito.when(recordReader.getProgress()).thenReturn(1.0f); + + Mockito.when(recordReader.next(null)).thenReturn(row1); + Mockito.when(recordReader.next(row1)).thenReturn(row2); + Mockito.when(recordReader.next(row2)).thenReturn(row3); + Mockito.when(recordReader.next(row3)).thenReturn(row4); + Mockito.when(recordReader.next(row4)).thenReturn(row5); + + return reader; + } + + @Test + public void testReaderPair() throws Exception { + ReaderKey key = new ReaderKey(); + Reader reader = createMockReader(); + RecordIdentifier minKey = new RecordIdentifier(10, 20, 30); + RecordIdentifier maxKey = new RecordIdentifier(40, 50, 60); + ReaderPair pair = new ReaderPair(key, reader, 20, minKey, maxKey, + new Reader.Options(), 0); + RecordReader recordReader = pair.recordReader; + assertEquals(10, key.getTransactionId()); + assertEquals(20, key.getBucketId()); + assertEquals(40, key.getRowId()); + assertEquals(120, key.getCurrentTransactionId()); + assertEquals("third", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(40, key.getTransactionId()); + assertEquals(50, key.getBucketId()); + assertEquals(60, key.getRowId()); + assertEquals(130, key.getCurrentTransactionId()); + assertEquals("fourth", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(null, pair.nextRecord); + Mockito.verify(recordReader).close(); + } + + @Test + public void testReaderPairNoMin() throws Exception { + ReaderKey key = new ReaderKey(); + Reader reader = createMockReader(); + + ReaderPair pair = new ReaderPair(key, reader, 20, null, null, + new Reader.Options(), 0); + RecordReader recordReader = pair.recordReader; + assertEquals(10, key.getTransactionId()); + assertEquals(20, key.getBucketId()); + assertEquals(20, key.getRowId()); + assertEquals(100, key.getCurrentTransactionId()); + assertEquals("first", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(10, key.getTransactionId()); + assertEquals(20, key.getBucketId()); + assertEquals(30, key.getRowId()); + assertEquals(110, key.getCurrentTransactionId()); + assertEquals("second", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(10, key.getTransactionId()); + assertEquals(20, key.getBucketId()); + assertEquals(40, key.getRowId()); + assertEquals(120, key.getCurrentTransactionId()); + assertEquals("third", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(40, key.getTransactionId()); + assertEquals(50, key.getBucketId()); + assertEquals(60, key.getRowId()); + assertEquals(130, key.getCurrentTransactionId()); + assertEquals("fourth", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(40, key.getTransactionId()); + assertEquals(50, key.getBucketId()); + assertEquals(61, key.getRowId()); + assertEquals(140, key.getCurrentTransactionId()); + assertEquals("fifth", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(null, pair.nextRecord); + Mockito.verify(recordReader).close(); + } + + private static OrcStruct createOriginalRow(String value) { + OrcStruct result = new OrcStruct(1); + result.setFieldValue(0, new Text(value)); + return result; + } + + private Reader createMockOriginalReader() throws IOException { + Reader reader = Mockito.mock(Reader.class, settings); + RecordReader recordReader = Mockito.mock(RecordReader.class, settings); + OrcStruct row1 = createOriginalRow("first"); + OrcStruct row2 = createOriginalRow("second"); + OrcStruct row3 = createOriginalRow("third"); + OrcStruct row4 = createOriginalRow("fourth"); + OrcStruct row5 = createOriginalRow("fifth"); + + Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) + .thenReturn(recordReader); + Mockito.when(recordReader.hasNext()). + thenReturn(true, true, true, true, true, false); + Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L); + Mockito.when(recordReader.next(null)).thenReturn(row1); + Mockito.when(recordReader.next(row1)).thenReturn(row2); + Mockito.when(recordReader.next(row2)).thenReturn(row3); + Mockito.when(recordReader.next(row3)).thenReturn(row4); + Mockito.when(recordReader.next(row4)).thenReturn(row5); + return reader; + } + + @Test + public void testOriginalReaderPair() throws Exception { + ReaderKey key = new ReaderKey(); + Reader reader = createMockOriginalReader(); + RecordIdentifier minKey = new RecordIdentifier(0, 10, 1); + RecordIdentifier maxKey = new RecordIdentifier(0, 10, 3); + boolean[] includes = new boolean[]{true, true}; + ReaderPair pair = new OriginalReaderPair(key, reader, 10, minKey, maxKey, + new Reader.Options().include(includes)); + RecordReader recordReader = pair.recordReader; + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(2, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + assertEquals("third", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(3, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + assertEquals("fourth", value(pair.nextRecord)); + + pair.next(pair.nextRecord); + assertEquals(null, pair.nextRecord); + Mockito.verify(recordReader).close(); + } + + private static ValidTxnList createMaximalTxnList() { + return new ValidReadTxnList(Long.MAX_VALUE + ":"); + } + + @Test + public void testOriginalReaderPairNoMin() throws Exception { + ReaderKey key = new ReaderKey(); + Reader reader = createMockOriginalReader(); + ReaderPair pair = new OriginalReaderPair(key, reader, 10, null, null, + new Reader.Options()); + assertEquals("first", value(pair.nextRecord)); + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(0, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + + pair.next(pair.nextRecord); + assertEquals("second", value(pair.nextRecord)); + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(1, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + + pair.next(pair.nextRecord); + assertEquals("third", value(pair.nextRecord)); + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(2, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + + pair.next(pair.nextRecord); + assertEquals("fourth", value(pair.nextRecord)); + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(3, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + + pair.next(pair.nextRecord); + assertEquals("fifth", value(pair.nextRecord)); + assertEquals(0, key.getTransactionId()); + assertEquals(10, key.getBucketId()); + assertEquals(4, key.getRowId()); + assertEquals(0, key.getCurrentTransactionId()); + + pair.next(pair.nextRecord); + assertEquals(null, pair.nextRecord); + Mockito.verify(pair.recordReader).close(); + } + + @Test + public void testNewBase() throws Exception { + Configuration conf = new Configuration(); + conf.set("columns", "col1"); + conf.set("columns.types", "string"); + Reader reader = Mockito.mock(Reader.class, settings); + RecordReader recordReader = Mockito.mock(RecordReader.class, settings); + + List types = new ArrayList(); + OrcProto.Type.Builder typeBuilder = OrcProto.Type.newBuilder(); + typeBuilder.setKind(OrcProto.Type.Kind.STRUCT).addSubtypes(1) + .addSubtypes(2).addSubtypes(3).addSubtypes(4).addSubtypes(5) + .addSubtypes(6); + types.add(typeBuilder.build()); + types.add(null); + types.add(null); + types.add(null); + types.add(null); + types.add(null); + typeBuilder.clearSubtypes(); + typeBuilder.addSubtypes(7); + types.add(typeBuilder.build()); + + Mockito.when(reader.getTypes()).thenReturn(types); + Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class))) + .thenReturn(recordReader); + + OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first"); + OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second"); + OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third"); + OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth"); + OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS); + setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth"); + + Mockito.when(recordReader.hasNext()). + thenReturn(true, true, true, true, true, false); + + Mockito.when(recordReader.getProgress()).thenReturn(1.0f); + + Mockito.when(recordReader.next(null)).thenReturn(row1, row4); + Mockito.when(recordReader.next(row1)).thenReturn(row2); + Mockito.when(recordReader.next(row2)).thenReturn(row3); + Mockito.when(recordReader.next(row3)).thenReturn(row5); + + Mockito.when(reader.getMetadataValue(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) + .thenReturn(ByteBuffer.wrap("10,20,30;40,50,60;40,50,61" + .getBytes("UTF-8"))); + Mockito.when(reader.getStripes()) + .thenReturn(createStripes(2, 2, 1)); + + OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, false, reader, + false, 10, createMaximalTxnList(), + new Reader.Options().range(1000, 1000), null); + RecordReader rr = merger.getCurrentReader().recordReader; + assertEquals(0, merger.getOtherReaders().size()); + + assertEquals(new RecordIdentifier(10, 20, 30), merger.getMinKey()); + assertEquals(new RecordIdentifier(40, 50, 60), merger.getMaxKey()); + RecordIdentifier id = merger.createKey(); + OrcStruct event = merger.createValue(); + + assertEquals(true, merger.next(id, event)); + assertEquals(10, id.getTransactionId()); + assertEquals(20, id.getBucketId()); + assertEquals(40, id.getRowId()); + assertEquals("third", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(40, id.getTransactionId()); + assertEquals(50, id.getBucketId()); + assertEquals(60, id.getRowId()); + assertEquals("fourth", getValue(event)); + + assertEquals(false, merger.next(id, event)); + assertEquals(1.0, merger.getProgress(), 0.01); + merger.close(); + Mockito.verify(rr).close(); + Mockito.verify(rr).getProgress(); + + StructObjectInspector eventObjectInspector = + (StructObjectInspector) merger.getObjectInspector(); + List fields = + eventObjectInspector.getAllStructFieldRefs(); + assertEquals(OrcRecordUpdater.FIELDS, fields.size()); + assertEquals("operation", + fields.get(OrcRecordUpdater.OPERATION).getFieldName()); + assertEquals("currentTransaction", + fields.get(OrcRecordUpdater.CURRENT_TRANSACTION).getFieldName()); + assertEquals("originalTransaction", + fields.get(OrcRecordUpdater.ORIGINAL_TRANSACTION).getFieldName()); + assertEquals("bucket", + fields.get(OrcRecordUpdater.BUCKET).getFieldName()); + assertEquals("rowId", + fields.get(OrcRecordUpdater.ROW_ID).getFieldName()); + StructObjectInspector rowObjectInspector = + (StructObjectInspector) fields.get(OrcRecordUpdater.ROW) + .getFieldObjectInspector(); + assertEquals("col1", + rowObjectInspector.getAllStructFieldRefs().get(0).getFieldName()); + } + + static class MyRow { + Text col1; + RecordIdentifier ROW__ID; + + MyRow(String val) { + col1 = new Text(val); + } + + MyRow(String val, long rowId, long origTxn, int bucket) { + col1 = new Text(val); + ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); + } + } + + static String getValue(OrcStruct event) { + return OrcRecordUpdater.getRow(event).getFieldValue(0).toString(); + } + + @Test + public void testEmpty() throws Exception { + final int BUCKET = 0; + Configuration conf = new Configuration(); + OrcOutputFormat of = new OrcOutputFormat(); + FileSystem fs = FileSystem.getLocal(conf); + Path root = new Path(tmpDir, "testEmpty").makeQualified(fs); + fs.delete(root, true); + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + // write the empty base + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) + .inspector(inspector).bucket(BUCKET).writingBase(true) + .maximumTransactionId(100).finalDestination(root); + of.getRecordUpdater(root, options).close(false); + + ValidTxnList txnList = new ValidReadTxnList("200:"); + AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList); + + Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), + BUCKET); + Reader baseReader = OrcFile.createReader(basePath, + OrcFile.readerOptions(conf)); + OrcRawRecordMerger merger = + new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, + createMaximalTxnList(), new Reader.Options(), + AcidUtils.getPaths(directory.getCurrentDirectories())); + RecordIdentifier key = merger.createKey(); + OrcStruct value = merger.createValue(); + assertEquals(false, merger.next(key, value)); + } + + /** + * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is + * a base and a delta. + * @throws Exception + */ + @Test + public void testNewBaseAndDelta() throws Exception { + testNewBaseAndDelta(false); + testNewBaseAndDelta(true); + } + private void testNewBaseAndDelta(boolean use130Format) throws Exception { + final int BUCKET = 10; + String[] values = new String[]{"first", "second", "third", "fourth", + "fifth", "sixth", "seventh", "eighth", + "ninth", "tenth"}; + Configuration conf = new Configuration(); + OrcOutputFormat of = new OrcOutputFormat(); + FileSystem fs = FileSystem.getLocal(conf); + Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs); + fs.delete(root, true); + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + // write the base + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) + .inspector(inspector).bucket(BUCKET).finalDestination(root); + if(!use130Format) { + options.statementId(-1); + } + RecordUpdater ru = of.getRecordUpdater(root, + options.writingBase(true).maximumTransactionId(100)); + for(String v: values) { + ru.insert(0, new MyRow(v)); + } + ru.close(false); + + // write a delta + ru = of.getRecordUpdater(root, options.writingBase(false) + .minimumTransactionId(200).maximumTransactionId(200).recordIdColumn(1)); + ru.update(200, new MyRow("update 1", 0, 0, BUCKET)); + ru.update(200, new MyRow("update 2", 2, 0, BUCKET)); + ru.update(200, new MyRow("update 3", 3, 0, BUCKET)); + ru.delete(200, new MyRow("", 7, 0, BUCKET)); + ru.delete(200, new MyRow("", 8, 0, BUCKET)); + ru.close(false); + + ValidTxnList txnList = new ValidReadTxnList("200:"); + AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList); + + assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory()); + assertEquals(new Path(root, use130Format ? + AcidUtils.deltaSubdir(200,200,0) : AcidUtils.deltaSubdir(200,200)), + directory.getCurrentDirectories().get(0).getPath()); + + Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), + BUCKET); + Reader baseReader = OrcFile.createReader(basePath, + OrcFile.readerOptions(conf)); + OrcRawRecordMerger merger = + new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, + createMaximalTxnList(), new Reader.Options(), + AcidUtils.getPaths(directory.getCurrentDirectories())); + assertEquals(null, merger.getMinKey()); + assertEquals(null, merger.getMaxKey()); + RecordIdentifier id = merger.createKey(); + OrcStruct event = merger.createValue(); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.UPDATE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 0, 200), id); + assertEquals("update 1", getValue(event)); + assertFalse(merger.isDelete(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 1, 0), id); + assertEquals("second", getValue(event)); + assertFalse(merger.isDelete(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.UPDATE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 2, 200), id); + assertEquals("update 2", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.UPDATE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 3, 200), id); + assertEquals("update 3", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 4, 0), id); + assertEquals("fifth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 5, 0), id); + assertEquals("sixth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 6, 0), id); + assertEquals("seventh", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.DELETE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 7, 200), id); + assertNull(OrcRecordUpdater.getRow(event)); + assertTrue(merger.isDelete(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.DELETE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 8, 200), id); + assertNull(OrcRecordUpdater.getRow(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 9, 0), id); + assertEquals("tenth", getValue(event)); + + assertEquals(false, merger.next(id, event)); + merger.close(); + + // make a merger that doesn't collapse events + merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, + createMaximalTxnList(), new Reader.Options(), + AcidUtils.getPaths(directory.getCurrentDirectories())); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.UPDATE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 0, 200), id); + assertEquals("update 1", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 0, 0), id); + assertEquals("first", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 1, 0), id); + assertEquals("second", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.UPDATE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 2, 200), id); + assertEquals("update 2", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 2, 0), id); + assertEquals("third", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.UPDATE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 3, 200), id); + assertEquals("update 3", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 3, 0), id); + assertEquals("fourth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 4, 0), id); + assertEquals("fifth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 5, 0), id); + assertEquals("sixth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 6, 0), id); + assertEquals("seventh", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.DELETE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 7, 200), id); + assertNull(OrcRecordUpdater.getRow(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 7, 0), id); + assertEquals("eighth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.DELETE_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 8, 200), id); + assertNull(OrcRecordUpdater.getRow(event)); + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 8, 0), id); + assertEquals("ninth", getValue(event)); + + assertEquals(true, merger.next(id, event)); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, 9, 0), id); + assertEquals("tenth", getValue(event)); + + assertEquals(false, merger.next(id, event)); + merger.close(); + + // try ignoring the 200 transaction and make sure it works still + ValidTxnList txns = new ValidReadTxnList("2000:200"); + merger = + new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, + txns, new Reader.Options(), + AcidUtils.getPaths(directory.getCurrentDirectories())); + for(int i=0; i < values.length; ++i) { + assertEquals(true, merger.next(id, event)); + LOG.info("id = " + id + "event = " + event); + assertEquals(OrcRecordUpdater.INSERT_OPERATION, + OrcRecordUpdater.getOperation(event)); + assertEquals(new ReaderKey(0, BUCKET, i, 0), id); + assertEquals(values[i], getValue(event)); + } + + assertEquals(false, merger.next(id, event)); + merger.close(); + } + + static class BigRow { + int myint; + long mylong; + Text mytext; + float myfloat; + double mydouble; + RecordIdentifier ROW__ID; + + BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble) { + this.myint = myint; + this.mylong = mylong; + this.mytext = new Text(mytext); + this.myfloat = myfloat; + this.mydouble = mydouble; + ROW__ID = null; + } + + BigRow(int myint, long mylong, String mytext, float myfloat, double mydouble, + long rowId, long origTxn, int bucket) { + this.myint = myint; + this.mylong = mylong; + this.mytext = new Text(mytext); + this.myfloat = myfloat; + this.mydouble = mydouble; + ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); + } + + BigRow(long rowId, long origTxn, int bucket) { + ROW__ID = new RecordIdentifier(origTxn, bucket, rowId); + } + } + + /** + * Test the OrcRecordUpdater with the OrcRawRecordMerger when there is + * a base and a delta. + * @throws Exception + */ + @Test + public void testRecordReaderOldBaseAndDelta() throws Exception { + final int BUCKET = 10; + Configuration conf = new Configuration(); + OrcOutputFormat of = new OrcOutputFormat(); + FileSystem fs = FileSystem.getLocal(conf); + Path root = new Path(tmpDir, "testOldBaseAndDelta").makeQualified(fs); + fs.delete(root, true); + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + // write the base + MemoryManager mgr = new MemoryManager(conf){ + int rowsAddedSinceCheck = 0; + + @Override + synchronized void addedRow(int rows) throws IOException { + rowsAddedSinceCheck += rows; + if (rowsAddedSinceCheck >= 2) { + notifyWriters(); + rowsAddedSinceCheck = 0; + } + } + }; + // make 5 stripes with 2 rows each + Writer writer = OrcFile.createWriter(new Path(root, "0000010_0"), + OrcFile.writerOptions(conf).inspector(inspector).fileSystem(fs) + .blockPadding(false).bufferSize(10000).compress(CompressionKind.NONE) + .stripeSize(1).memory(mgr).version(OrcFile.Version.V_0_11)); + String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3", + "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"}; + for(int i=0; i < values.length; ++i) { + writer.addRow(new BigRow(i, i, values[i], i, i)); + } + writer.close(); + + // write a delta + AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf) + .writingBase(false).minimumTransactionId(1).maximumTransactionId(1) + .bucket(BUCKET).inspector(inspector).filesystem(fs).recordIdColumn(5).finalDestination(root); + RecordUpdater ru = of.getRecordUpdater(root, options); + values = new String[]{"0.0", null, null, "1.1", null, null, null, + "ignore.7"}; + for(int i=0; i < values.length; ++i) { + if (values[i] != null) { + ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); + } + } + ru.delete(100, new BigRow(9, 0, BUCKET)); + ru.close(false); + + // write a delta + options = options.minimumTransactionId(2).maximumTransactionId(2); + ru = of.getRecordUpdater(root, options); + values = new String[]{null, null, "1.0", null, null, null, null, "3.1"}; + for(int i=0; i < values.length; ++i) { + if (values[i] != null) { + ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); + } + } + ru.delete(100, new BigRow(8, 0, BUCKET)); + ru.close(false); + + InputFormat inf = new OrcInputFormat(); + JobConf job = new JobConf(); + job.set("mapred.min.split.size", "1"); + job.set("mapred.max.split.size", "2"); + job.set("mapred.input.dir", root.toString()); + InputSplit[] splits = inf.getSplits(job, 5); + assertEquals(5, splits.length); + org.apache.hadoop.mapred.RecordReader rr; + + // loop through the 5 splits and read each + for(int i=0; i < 4; ++i) { + System.out.println("starting split " + i); + rr = inf.getRecordReader(splits[i], job, Reporter.NULL); + NullWritable key = rr.createKey(); + OrcStruct value = rr.createValue(); + + // there should be exactly two rows per a split + for(int j=0; j < 2; ++j) { + System.out.println("i = " + i + ", j = " + j); + assertEquals(true, rr.next(key, value)); + System.out.println("record = " + value); + assertEquals(i + "." + j, value.getFieldValue(2).toString()); + } + assertEquals(false, rr.next(key, value)); + } + rr = inf.getRecordReader(splits[4], job, Reporter.NULL); + assertEquals(false, rr.next(rr.createKey(), rr.createValue())); + } + + /** + * Test the RecordReader when there is a new base and a delta. + * @throws Exception + */ + @Test + public void testRecordReaderNewBaseAndDelta() throws Exception { + final int BUCKET = 11; + Configuration conf = new Configuration(); + OrcOutputFormat of = new OrcOutputFormat(); + FileSystem fs = FileSystem.getLocal(conf); + Path root = new Path(tmpDir, "testRecordReaderNewBaseAndDelta").makeQualified(fs); + fs.delete(root, true); + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + // write the base + MemoryManager mgr = new MemoryManager(conf){ + int rowsAddedSinceCheck = 0; + + @Override + synchronized void addedRow(int rows) throws IOException { + rowsAddedSinceCheck += rows; + if (rowsAddedSinceCheck >= 2) { + notifyWriters(); + rowsAddedSinceCheck = 0; + } + } + }; + + // make 5 stripes with 2 rows each + OrcRecordUpdater.OrcOptions options = (OrcRecordUpdater.OrcOptions) + new OrcRecordUpdater.OrcOptions(conf) + .writingBase(true).minimumTransactionId(0).maximumTransactionId(0) + .bucket(BUCKET).inspector(inspector).filesystem(fs); + options.orcOptions(OrcFile.writerOptions(conf) + .stripeSize(1).blockPadding(false).compress(CompressionKind.NONE) + .memory(mgr)); + options.finalDestination(root); + RecordUpdater ru = of.getRecordUpdater(root, options); + String[] values= new String[]{"ignore.1", "0.1", "ignore.2", "ignore.3", + "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6"}; + for(int i=0; i < values.length; ++i) { + ru.insert(0, new BigRow(i, i, values[i], i, i)); + } + ru.close(false); + + // write a delta + options.writingBase(false).minimumTransactionId(1).maximumTransactionId(1).recordIdColumn(5); + ru = of.getRecordUpdater(root, options); + values = new String[]{"0.0", null, null, "1.1", null, null, null, + "ignore.7"}; + for(int i=0; i < values.length; ++i) { + if (values[i] != null) { + ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); + } + } + ru.delete(100, new BigRow(9, 0, BUCKET)); + ru.close(false); + + // write a delta + options.minimumTransactionId(2).maximumTransactionId(2); + ru = of.getRecordUpdater(root, options); + values = new String[]{null, null, "1.0", null, null, null, null, "3.1"}; + for(int i=0; i < values.length; ++i) { + if (values[i] != null) { + ru.update(2, new BigRow(i, i, values[i], i, i, i, 0, BUCKET)); + } + } + ru.delete(100, new BigRow(8, 0, BUCKET)); + ru.close(false); + + InputFormat inf = new OrcInputFormat(); + JobConf job = new JobConf(); + job.set("mapred.min.split.size", "1"); + job.set("mapred.max.split.size", "2"); + job.set("mapred.input.dir", root.toString()); + InputSplit[] splits = inf.getSplits(job, 5); + assertEquals(5, splits.length); + org.apache.hadoop.mapred.RecordReader rr; + + // loop through the 5 splits and read each + for(int i=0; i < 4; ++i) { + System.out.println("starting split " + i); + rr = inf.getRecordReader(splits[i], job, Reporter.NULL); + NullWritable key = rr.createKey(); + OrcStruct value = rr.createValue(); + + // there should be exactly two rows per a split + for(int j=0; j < 2; ++j) { + System.out.println("i = " + i + ", j = " + j); + assertEquals(true, rr.next(key, value)); + System.out.println("record = " + value); + assertEquals(i + "." + j, value.getFieldValue(2).toString()); + } + assertEquals(false, rr.next(key, value)); + } + rr = inf.getRecordReader(splits[4], job, Reporter.NULL); + assertEquals(false, rr.next(rr.createKey(), rr.createValue())); + } + + /** + * Test the RecordReader when there is a new base and a delta. + * @throws Exception + */ + @Test + public void testRecordReaderDelta() throws Exception { + final int BUCKET = 0; + Configuration conf = new Configuration(); + OrcOutputFormat of = new OrcOutputFormat(); + FileSystem fs = FileSystem.getLocal(conf); + Path root = new Path(tmpDir, "testRecordReaderDelta").makeQualified(fs); + fs.delete(root, true); + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + // write a delta + AcidOutputFormat.Options options = + new AcidOutputFormat.Options(conf) + .bucket(BUCKET).inspector(inspector).filesystem(fs) + .writingBase(false).minimumTransactionId(1).maximumTransactionId(1) + .finalDestination(root); + RecordUpdater ru = of.getRecordUpdater(root, options); + String[] values = new String[]{"a", "b", "c", "d", "e"}; + for(int i=0; i < values.length; ++i) { + ru.insert(1, new MyRow(values[i])); + } + ru.close(false); + + // write a delta + options.minimumTransactionId(2).maximumTransactionId(2); + ru = of.getRecordUpdater(root, options); + values = new String[]{"f", "g", "h", "i", "j"}; + for(int i=0; i < values.length; ++i) { + ru.insert(2, new MyRow(values[i])); + } + ru.close(false); + + InputFormat inf = new OrcInputFormat(); + JobConf job = new JobConf(); + job.set("mapred.min.split.size", "1"); + job.set("mapred.max.split.size", "2"); + job.set("mapred.input.dir", root.toString()); + job.set("bucket_count", "1"); + InputSplit[] splits = inf.getSplits(job, 5); + assertEquals(1, splits.length); + org.apache.hadoop.mapred.RecordReader rr; + rr = inf.getRecordReader(splits[0], job, Reporter.NULL); + values = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}; + OrcStruct row = rr.createValue(); + for(int i = 0; i < values.length; ++i) { + System.out.println("Checking " + i); + assertEquals(true, rr.next(NullWritable.get(), row)); + assertEquals(values[i], row.getFieldValue(0).toString()); + } + assertEquals(false, rr.next(NullWritable.get(), row)); + } + + /** + * Test the RecordReader when the delta has been flushed, but not closed. + * @throws Exception + */ + @Test + public void testRecordReaderIncompleteDelta() throws Exception { + testRecordReaderIncompleteDelta(false); + testRecordReaderIncompleteDelta(true); + } + /** + * + * @param use130Format true means use delta_0001_0001_0000 format, else delta_0001_00001 + */ + private void testRecordReaderIncompleteDelta(boolean use130Format) throws Exception { + final int BUCKET = 1; + Configuration conf = new Configuration(); + OrcOutputFormat of = new OrcOutputFormat(); + FileSystem fs = FileSystem.getLocal(conf).getRaw(); + Path root = new Path(tmpDir, "testRecordReaderIncompleteDelta").makeQualified(fs); + fs.delete(root, true); + ObjectInspector inspector; + synchronized (TestOrcFile.class) { + inspector = ObjectInspectorFactory.getReflectionObjectInspector + (MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA); + } + + // write a base + AcidOutputFormat.Options options = + new AcidOutputFormat.Options(conf) + .writingBase(true).minimumTransactionId(0).maximumTransactionId(0) + .bucket(BUCKET).inspector(inspector).filesystem(fs).finalDestination(root); + if(!use130Format) { + options.statementId(-1); + } + RecordUpdater ru = of.getRecordUpdater(root, options); + String[] values= new String[]{"1", "2", "3", "4", "5"}; + for(int i=0; i < values.length; ++i) { + ru.insert(0, new MyRow(values[i])); + } + ru.close(false); + + // write a delta + options.writingBase(false).minimumTransactionId(10) + .maximumTransactionId(19); + ru = of.getRecordUpdater(root, options); + values = new String[]{"6", "7", "8"}; + for(int i=0; i < values.length; ++i) { + ru.insert(1, new MyRow(values[i])); + } + InputFormat inf = new OrcInputFormat(); + JobConf job = new JobConf(); + job.set("mapred.input.dir", root.toString()); + job.set("bucket_count", "2"); + + // read the keys before the delta is flushed + InputSplit[] splits = inf.getSplits(job, 1); + assertEquals(2, splits.length); + org.apache.hadoop.mapred.RecordReader rr = + inf.getRecordReader(splits[0], job, Reporter.NULL); + NullWritable key = rr.createKey(); + OrcStruct value = rr.createValue(); + System.out.println("Looking at split " + splits[0]); + for(int i=1; i < 6; ++i) { + System.out.println("Checking row " + i); + assertEquals(true, rr.next(key, value)); + assertEquals(Integer.toString(i), value.getFieldValue(0).toString()); + } + assertEquals(false, rr.next(key, value)); + + ru.flush(); + ru.flush(); + values = new String[]{"9", "10"}; + for(int i=0; i < values.length; ++i) { + ru.insert(3, new MyRow(values[i])); + } + ru.flush(); + + splits = inf.getSplits(job, 1); + assertEquals(2, splits.length); + rr = inf.getRecordReader(splits[0], job, Reporter.NULL); + Path sideFile = new Path(root + "/" + (use130Format ? AcidUtils.deltaSubdir(10,19,0) : + AcidUtils.deltaSubdir(10,19)) + "/bucket_00001_flush_length"); + assertEquals(true, fs.exists(sideFile)); + assertEquals(24, fs.getFileStatus(sideFile).getLen()); + + for(int i=1; i < 11; ++i) { + assertEquals(true, rr.next(key, value)); + assertEquals(Integer.toString(i), value.getFieldValue(0).toString()); + } + assertEquals(false, rr.next(key, value)); + } + +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java index e72e5cf..3b35d07 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java @@ -18,11 +18,13 @@ package org.apache.hadoop.hive.ql.io.orc; +import static org.junit.Assert.assertEquals; import java.io.File; import java.sql.Date; import java.sql.Timestamp; import java.util.Calendar; import java.util.Random; +import org.apache.hadoop.io.Text; import junit.framework.Assert; @@ -30,15 +32,23 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; import org.junit.Before; import org.junit.Test; @@ -149,42 +159,61 @@ private void checkVectorizedReader() throws Exception { row = (OrcStruct) rr.next(row); for (int j = 0; j < batch.cols.length; j++) { Object a = (row.getFieldValue(j)); - Object b = batch.cols[j].getWritableObject(i); - // Boolean values are stores a 1's and 0's, so convert and compare - if (a instanceof BooleanWritable) { + ColumnVector cv = batch.cols[j]; + // if the value is repeating, use row 0 + int rowId = cv.isRepeating ? 0 : i; + + // make sure the null flag agrees + if (a == null) { + Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]); + } else if (a instanceof BooleanWritable) { + + // Boolean values are stores a 1's and 0's, so convert and compare Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0); - Assert.assertEquals(true, temp.toString().equals(b.toString())); - continue; - } - // Timestamps are stored as long, so convert and compare - if (a instanceof TimestampWritable) { + long b = ((LongColumnVector) cv).vector[rowId]; + Assert.assertEquals(temp.toString(), Long.toString(b)); + } else if (a instanceof TimestampWritable) { + // Timestamps are stored as long, so convert and compare TimestampWritable t = ((TimestampWritable) a); // Timestamp.getTime() is overriden and is // long time = super.getTime(); // return (time + (nanos / 1000000)); Long timeInNanoSec = (t.getTimestamp().getTime() * 1000000) + (t.getTimestamp().getNanos() % 1000000); - Assert.assertEquals(true, timeInNanoSec.toString().equals(b.toString())); - continue; - } + long b = ((LongColumnVector) cv).vector[rowId]; + Assert.assertEquals(timeInNanoSec.toString(), Long.toString(b)); + + } else if (a instanceof DateWritable) { + // Dates are stored as long, so convert and compare - // Dates are stored as long, so convert and compare - if (a instanceof DateWritable) { DateWritable adt = (DateWritable) a; - Assert.assertEquals(adt.get().getTime(), DateWritable.daysToMillis((int) ((LongWritable) b).get())); - continue; - } + long b = ((LongColumnVector) cv).vector[rowId]; + Assert.assertEquals(adt.get().getTime(), + DateWritable.daysToMillis((int) b)); - // Decimals are stored as BigInteger, so convert and compare - if (a instanceof HiveDecimalWritable) { + } else if (a instanceof HiveDecimalWritable) { + // Decimals are stored as BigInteger, so convert and compare HiveDecimalWritable dec = (HiveDecimalWritable) a; + HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i]; Assert.assertEquals(dec, b); - } - if (null == a) { - Assert.assertEquals(true, (b == null || (b instanceof NullWritable))); + } else if (a instanceof DoubleWritable) { + + double b = ((DoubleColumnVector) cv).vector[rowId]; + assertEquals(a.toString(), Double.toString(b)); + } else if (a instanceof Text) { + BytesColumnVector bcv = (BytesColumnVector) cv; + Text b = new Text(); + b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]); + assertEquals(a, b); + } else if (a instanceof IntWritable || + a instanceof LongWritable || + a instanceof ByteWritable || + a instanceof ShortWritable) { + assertEquals(a.toString(), + Long.toString(((LongColumnVector) cv).vector[rowId])); } else { - Assert.assertEquals(true, b.toString().equals(a.toString())); + assertEquals("huh", a.getClass().getName()); } } } diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q new file mode 100644 index 0000000..480c345 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_part.q @@ -0,0 +1,173 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=false; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string); + +insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select part,a,b,c,d from partitioned5; + +-- UPDATE New Columns +update partitioned5 set c=99; + +select part,a,b,c,d from partitioned5; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string); + +insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select part,a,b,c,d from partitioned6; + +-- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6; + +select part,a,b,c,d from partitioned6; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string); + +insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select part,a,b,c,d from partitioned7; + +-- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100; + +select part,a,b,c,d from partitioned7; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; +DROP TABLE partitioned5; +DROP TABLE partitioned6; +DROP TABLE partitioned7; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q new file mode 100644 index 0000000..61ba005 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_acid_mapwork_table.q @@ -0,0 +1,131 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=false; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string); + +insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select a,b,c,d from table5; + +-- UPDATE New Columns +update table5 set c=99; + +select a,b,c,d from table5; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string); + +insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select a,b,c,d from table6; + +-- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6; + +select a,b,c,d from table6; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string); + +insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select a,b,c,d from table7; + +-- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100; + +select a,b,c,d from table7; + + +DROP TABLE table1; +DROP TABLE table2; +DROP TABLE table5; +DROP TABLE table6; +DROP TABLE table7; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q new file mode 100644 index 0000000..8f1d369 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_part.q @@ -0,0 +1,173 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string); + +insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select part,a,b,c,d from partitioned5; + +-- UPDATE New Columns +update partitioned5 set c=99; + +select part,a,b,c,d from partitioned5; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string); + +insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select part,a,b,c,d from partitioned6; + +-- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6; + +select part,a,b,c,d from partitioned6; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string); + +insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select part,a,b,c,d from partitioned7; + +-- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100; + +select part,a,b,c,d from partitioned7; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; +DROP TABLE partitioned5; +DROP TABLE partitioned6; +DROP TABLE partitioned7; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q new file mode 100644 index 0000000..c901ad4 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_acidvec_mapwork_table.q @@ -0,0 +1,131 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=false; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string); + +insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select a,b,c,d from table5; + +-- UPDATE New Columns +update table5 set c=99; + +select a,b,c,d from table5; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string); + +insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select a,b,c,d from table6; + +-- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6; + +select a,b,c,d from table6; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true'); + +insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string); + +insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +select a,b,c,d from table7; + +-- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100; + +select a,b,c,d from table7; + + +DROP TABLE table1; +DROP TABLE table2; +DROP TABLE table5; +DROP TABLE table6; +DROP TABLE table7; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_fetchwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_fetchwork_part.q new file mode 100644 index 0000000..cf42e9c --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_fetchwork_part.q @@ -0,0 +1,97 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=more; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_fetchwork_table.q new file mode 100644 index 0000000..b239a42 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_fetchwork_table.q @@ -0,0 +1,57 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=more; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_part.q new file mode 100644 index 0000000..c120d60 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_part.q @@ -0,0 +1,97 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q new file mode 100644 index 0000000..ece45eb --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_nonvec_mapwork_table.q @@ -0,0 +1,57 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q new file mode 100644 index 0000000..8bd6de3 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_part.q @@ -0,0 +1,97 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=more; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q new file mode 100644 index 0000000..cad22d5 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_orc_vec_mapwork_table.q @@ -0,0 +1,57 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q new file mode 100644 index 0000000..929524b --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_fetchwork_table.q @@ -0,0 +1,57 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q new file mode 100644 index 0000000..929524b --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_mapwork_table.q @@ -0,0 +1,57 @@ +set hive.cli.print.header=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.enforce.bucketing=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1; +select a,b,c from table1; +select a,b,c,d from table1; +select a,c,d from table1; +select a,d from table1; +select c from table1; +select d from table1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int; + +insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table table2 values(5000, 'new'),(90000, 'new'); + +select a,b from table2; + + +DROP TABLE table1; +DROP TABLE table2; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q new file mode 100644 index 0000000..2d78c6d --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_part.q @@ -0,0 +1,97 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=more; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q new file mode 100644 index 0000000..731cf77 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_fetchwork_table.q @@ -0,0 +1,67 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=more; + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +select a,b from table1; + +-- ADD COLUMNS +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +select a,b,c,d from table1; + +-- ADD COLUMNS +alter table table1 add columns(e string); + +insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); + +select a,b,c,d,e from table1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +select a,b from table3; + +-- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int; + +insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +select a,b from table3; + +-- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string); + +insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); + +select a,b from table3; + + +-- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int; + +select a,b from table3; + + +DROP TABLE table1; +DROP TABLE table2; +DROP TABLE table3; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q new file mode 100644 index 0000000..5f557c9 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_part.q @@ -0,0 +1,97 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; +set hive.exec.dynamic.partition.mode=nonstrict; + + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string); + +insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred'); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int; + +insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new'); + +select part,a,b from partitioned2; + + +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +-- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string); + +insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1); + +-- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1; +select part,a,b,c from partitioned1; +select part,a,b,c,d from partitioned1; +select part,a,c,d from partitioned1; +select part,a,d from partitioned1; +select part,c from partitioned1; +select part,d from partitioned1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE; + +insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +-- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int; + +insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1); + +select part,a,b from partitioned4; + + +DROP TABLE partitioned1; +DROP TABLE partitioned2; +DROP TABLE partitioned3; +DROP TABLE partitioned4; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q new file mode 100644 index 0000000..155602e --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_text_nonvec_mapwork_table.q @@ -0,0 +1,67 @@ +set hive.cli.print.header=true; +SET hive.exec.schema.evolution=true; +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion=none; + +-- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE; + +insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original'); + +select a,b from table1; + +-- ADD COLUMNS +alter table table1 add columns(c int, d string); + +insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty'); + +select a,b,c,d from table1; + +-- ADD COLUMNS +alter table table1 add columns(e string); + +insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2'); + +select a,b,c,d,e from table1; + + +-- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE; + +insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original'); + +select a,b from table3; + +-- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int; + +insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new'); + +select a,b from table3; + +-- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string); + +insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6'); + +select a,b from table3; + + +-- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int; + +select a,b from table3; + + +DROP TABLE table1; +DROP TABLE table2; +DROP TABLE table3; \ No newline at end of file diff --git ql/src/test/results/clientpositive/schema_evol_orc_acid_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_orc_acid_mapwork_part.q.out new file mode 100644 index 0000000..a922175 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_acid_mapwork_part.q.out @@ -0,0 +1,1037 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@partitioned5@part=2 +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).c EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).d SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +PREHOOK: Output: default@partitioned5@part=1 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Output: default@partitioned5@part=2 +row__id a b _c3 d part +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original 99 NULL +1 2 original 99 NULL +1 3 original 99 NULL +1 4 original 99 NULL +1 5 new 99 hundred +1 6 new 99 two hundred +2 1 new 99 ten +2 2 new 99 twenty +2 3 new 99 thirty +2 4 new 99 forty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@partitioned6@part=2 +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__16 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__16 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).c EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).d SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +PREHOOK: Output: default@partitioned6@part=1 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Output: default@partitioned6@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 3 original NULL NULL +1 5 new 100 hundred +2 1 new 10 ten +2 3 new 30 thirty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__17 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__17 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__18 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__18 +POSTHOOK: Output: default@partitioned7@part=2 +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).a EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).b SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).c EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).d SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__19 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__19 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).c EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).d SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +PREHOOK: Output: default@partitioned7@part=1 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Output: default@partitioned7@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 6 new 200 two hundred +2 2 new 20 twenty +2 4 new 40 forty +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: DROP TABLE partitioned5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: DROP TABLE partitioned5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: DROP TABLE partitioned6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: DROP TABLE partitioned6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: DROP TABLE partitioned7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: DROP TABLE partitioned7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 diff --git ql/src/test/results/clientpositive/schema_evol_orc_acid_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_orc_acid_mapwork_table.q.out new file mode 100644 index 0000000..4885aeb --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_acid_mapwork_table.q.out @@ -0,0 +1,651 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- UPDATE New Columns +update table5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- UPDATE New Columns +update table5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +row__id a b _c3 d +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 99 ten +1 original 99 NULL +2 new 99 twenty +2 original 99 NULL +3 new 99 thirty +3 original 99 NULL +4 new 99 forty +4 original 99 NULL +5 new 99 hundred +6 new 99 two hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +row__id +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +3 new 30 thirty +3 original NULL NULL +5 new 100 hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +row__id +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +2 new 20 twenty +2 original NULL NULL +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +6 new 200 two hundred +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: DROP TABLE table5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: DROP TABLE table5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: DROP TABLE table6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: DROP TABLE table6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: DROP TABLE table7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: DROP TABLE table7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 diff --git ql/src/test/results/clientpositive/schema_evol_orc_acidvec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_orc_acidvec_mapwork_part.q.out new file mode 100644 index 0000000..c5af165 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_acidvec_mapwork_part.q.out @@ -0,0 +1,1037 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@partitioned5@part=2 +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).c EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).d SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +PREHOOK: Output: default@partitioned5@part=1 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Output: default@partitioned5@part=2 +row__id a b _c3 d part +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original 99 NULL +1 2 original 99 NULL +1 3 original 99 NULL +1 4 original 99 NULL +1 5 new 99 hundred +1 6 new 99 two hundred +2 1 new 99 ten +2 2 new 99 twenty +2 3 new 99 thirty +2 4 new 99 forty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@partitioned6@part=2 +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__16 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__16 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).c EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).d SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +PREHOOK: Output: default@partitioned6@part=1 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Output: default@partitioned6@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 3 original NULL NULL +1 5 new 100 hundred +2 1 new 10 ten +2 3 new 30 thirty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__17 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__17 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__18 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__18 +POSTHOOK: Output: default@partitioned7@part=2 +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).a EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).b SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).c EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).d SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__19 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__19 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).c EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).d SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +PREHOOK: Output: default@partitioned7@part=1 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Output: default@partitioned7@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 6 new 200 two hundred +2 2 new 20 twenty +2 4 new 40 forty +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: DROP TABLE partitioned5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: DROP TABLE partitioned5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: DROP TABLE partitioned6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: DROP TABLE partitioned6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: DROP TABLE partitioned7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: DROP TABLE partitioned7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 diff --git ql/src/test/results/clientpositive/schema_evol_orc_acidvec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_orc_acidvec_mapwork_table.q.out new file mode 100644 index 0000000..2b1e5c3 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_acidvec_mapwork_table.q.out @@ -0,0 +1,651 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- UPDATE New Columns +update table5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- UPDATE New Columns +update table5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +row__id a b _c3 d +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 99 ten +1 original 99 NULL +2 new 99 twenty +2 original 99 NULL +3 new 99 thirty +3 original 99 NULL +4 new 99 forty +4 original 99 NULL +5 new 99 hundred +6 new 99 two hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +row__id +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +3 new 30 thirty +3 original NULL NULL +5 new 100 hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +row__id +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +2 new 20 twenty +2 original NULL NULL +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +6 new 200 two hundred +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: DROP TABLE table5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: DROP TABLE table5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: DROP TABLE table6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: DROP TABLE table6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: DROP TABLE table7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: DROP TABLE table7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 diff --git ql/src/test/results/clientpositive/schema_evol_orc_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/schema_evol_orc_nonvec_fetchwork_part.q.out new file mode 100644 index 0000000..dc2fd9a --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_nonvec_fetchwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_orc_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/schema_evol_orc_nonvec_fetchwork_table.q.out new file mode 100644 index 0000000..93f4dcc --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_nonvec_fetchwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_orc_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_orc_nonvec_mapwork_part.q.out new file mode 100644 index 0000000..dc2fd9a --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_nonvec_mapwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_orc_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_orc_nonvec_mapwork_table.q.out new file mode 100644 index 0000000..df9494f --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_nonvec_mapwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_orc_vec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_orc_vec_mapwork_part.q.out new file mode 100644 index 0000000..9f1fda8 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_vec_mapwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_orc_vec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_orc_vec_mapwork_table.q.out new file mode 100644 index 0000000..aef9a74 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_orc_vec_mapwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_fetchwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_fetchwork_table.q.out new file mode 100644 index 0000000..f849004 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_fetchwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_mapwork_table.q.out new file mode 100644 index 0000000..f849004 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_mapwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out new file mode 100644 index 0000000..44ce24e --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out new file mode 100644 index 0000000..4003c20 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_fetchwork_table.q.out @@ -0,0 +1,297 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 original +2 original +3 original +4 original +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: select a,b,c,d,e from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d e +1 new 10 ten NULL +1 original NULL NULL NULL +2 new 20 twenty NULL +2 original NULL NULL NULL +3 new 30 thirty NULL +3 original NULL NULL NULL +4 new 40 forty NULL +4 original NULL NULL NULL +5 new 100 hundred another1 +6 new 200 two hundred another2 +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +3 original +4 original +6737 original +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +6737 original +72909 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: DROP TABLE table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out new file mode 100644 index 0000000..44f5822 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out new file mode 100644 index 0000000..4003c20 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_text_nonvec_mapwork_table.q.out @@ -0,0 +1,297 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 original +2 original +3 original +4 original +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: select a,b,c,d,e from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d e +1 new 10 ten NULL +1 original NULL NULL NULL +2 new 20 twenty NULL +2 original NULL NULL NULL +3 new 30 thirty NULL +3 original NULL NULL NULL +4 new 40 forty NULL +4 original NULL NULL NULL +5 new 100 hundred another1 +6 new 200 two hundred another2 +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +3 original +4 original +6737 original +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +6737 original +72909 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: DROP TABLE table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_acid_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_acid_mapwork_part.q.out new file mode 100644 index 0000000..a922175 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_acid_mapwork_part.q.out @@ -0,0 +1,1037 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@partitioned5@part=2 +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).c EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).d SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +PREHOOK: Output: default@partitioned5@part=1 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Output: default@partitioned5@part=2 +row__id a b _c3 d part +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original 99 NULL +1 2 original 99 NULL +1 3 original 99 NULL +1 4 original 99 NULL +1 5 new 99 hundred +1 6 new 99 two hundred +2 1 new 99 ten +2 2 new 99 twenty +2 3 new 99 thirty +2 4 new 99 forty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@partitioned6@part=2 +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__16 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__16 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).c EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).d SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +PREHOOK: Output: default@partitioned6@part=1 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Output: default@partitioned6@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 3 original NULL NULL +1 5 new 100 hundred +2 1 new 10 ten +2 3 new 30 thirty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__17 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__17 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__18 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__18 +POSTHOOK: Output: default@partitioned7@part=2 +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).a EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).b SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).c EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).d SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__19 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__19 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).c EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).d SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +PREHOOK: Output: default@partitioned7@part=1 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Output: default@partitioned7@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 6 new 200 two hundred +2 2 new 20 twenty +2 4 new 40 forty +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: DROP TABLE partitioned5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: DROP TABLE partitioned5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: DROP TABLE partitioned6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: DROP TABLE partitioned6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: DROP TABLE partitioned7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: DROP TABLE partitioned7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_acid_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_acid_mapwork_table.q.out new file mode 100644 index 0000000..4885aeb --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_acid_mapwork_table.q.out @@ -0,0 +1,651 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Non-Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- UPDATE New Columns +update table5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- UPDATE New Columns +update table5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +row__id a b _c3 d +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 99 ten +1 original 99 NULL +2 new 99 twenty +2 original 99 NULL +3 new 99 thirty +3 original 99 NULL +4 new 99 forty +4 original 99 NULL +5 new 99 hundred +6 new 99 two hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +row__id +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +3 new 30 thirty +3 original NULL NULL +5 new 100 hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +row__id +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +2 new 20 twenty +2 original NULL NULL +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +6 new 200 two hundred +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: DROP TABLE table5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: DROP TABLE table5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: DROP TABLE table6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: DROP TABLE table6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: DROP TABLE table7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: DROP TABLE table7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_acidvec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_acidvec_mapwork_part.q.out new file mode 100644 index 0000000..c5af165 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_acidvec_mapwork_part.q.out @@ -0,0 +1,1037 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Partitioned +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE partitioned5(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: insert into table partitioned5 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@partitioned5@part=2 +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=2).d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@partitioned5@part=1 +POSTHOOK: query: insert into table partitioned5 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).c EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned5 PARTITION(part=1).d SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +PREHOOK: Output: default@partitioned5@part=1 +PREHOOK: Output: default@partitioned5@part=2 +POSTHOOK: query: -- UPDATE New Columns +update partitioned5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +POSTHOOK: Output: default@partitioned5@part=1 +POSTHOOK: Output: default@partitioned5@part=2 +row__id a b _c3 d part +PREHOOK: query: select part,a,b,c,d from partitioned5 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned5 +PREHOOK: Input: default@partitioned5@part=1 +PREHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Input: default@partitioned5@part=1 +POSTHOOK: Input: default@partitioned5@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original 99 NULL +1 2 original 99 NULL +1 3 original 99 NULL +1 4 original 99 NULL +1 5 new 99 hundred +1 6 new 99 two hundred +2 1 new 99 ten +2 2 new 99 twenty +2 3 new 99 thirty +2 4 new 99 forty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE partitioned6(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: insert into table partitioned6 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@partitioned6@part=2 +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=2).d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__16 +PREHOOK: Output: default@partitioned6@part=1 +POSTHOOK: query: insert into table partitioned6 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__16 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).a EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).b SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).c EXPRESSION [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned6 PARTITION(part=1).d SIMPLE [(values__tmp__table__16)values__tmp__table__16.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +PREHOOK: Output: default@partitioned6@part=1 +PREHOOK: Output: default@partitioned6@part=2 +POSTHOOK: query: -- DELETE where old column +delete from partitioned6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +POSTHOOK: Output: default@partitioned6@part=1 +POSTHOOK: Output: default@partitioned6@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned6 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned6 +PREHOOK: Input: default@partitioned6@part=1 +PREHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Input: default@partitioned6@part=1 +POSTHOOK: Input: default@partitioned6@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 3 original NULL NULL +1 5 new 100 hundred +2 1 new 10 ten +2 3 new 30 thirty +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE partitioned7(a INT, b STRING) PARTITIONED BY(part INT) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__17 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__17 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__17)values__tmp__table__17.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 +PREHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__18 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: insert into table partitioned7 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__18 +POSTHOOK: Output: default@partitioned7@part=2 +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).a EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).b SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).c EXPRESSION [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=2).d SIMPLE [(values__tmp__table__18)values__tmp__table__18.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__19 +PREHOOK: Output: default@partitioned7@part=1 +POSTHOOK: query: insert into table partitioned7 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__19 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).a EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).b SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).c EXPRESSION [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned7 PARTITION(part=1).d SIMPLE [(values__tmp__table__19)values__tmp__table__19.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +PREHOOK: Output: default@partitioned7@part=1 +PREHOOK: Output: default@partitioned7@part=2 +POSTHOOK: query: -- DELETE where new column +delete from partitioned7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +POSTHOOK: Output: default@partitioned7@part=1 +POSTHOOK: Output: default@partitioned7@part=2 +row__id part +PREHOOK: query: select part,a,b,c,d from partitioned7 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned7 +PREHOOK: Input: default@partitioned7@part=1 +PREHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Input: default@partitioned7@part=1 +POSTHOOK: Input: default@partitioned7@part=2 +#### A masked pattern was here #### +part a b c d +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 6 new 200 two hundred +2 2 new 20 twenty +2 4 new 40 forty +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: DROP TABLE partitioned5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned5 +PREHOOK: Output: default@partitioned5 +POSTHOOK: query: DROP TABLE partitioned5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned5 +POSTHOOK: Output: default@partitioned5 +PREHOOK: query: DROP TABLE partitioned6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned6 +PREHOOK: Output: default@partitioned6 +POSTHOOK: query: DROP TABLE partitioned6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned6 +POSTHOOK: Output: default@partitioned6 +PREHOOK: query: DROP TABLE partitioned7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned7 +PREHOOK: Output: default@partitioned7 +POSTHOOK: query: DROP TABLE partitioned7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned7 +POSTHOOK: Output: default@partitioned7 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_acidvec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_acidvec_mapwork_table.q.out new file mode 100644 index 0000000..2b1e5c3 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_acidvec_mapwork_table.q.out @@ -0,0 +1,651 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, ACID Vectorized, MapWork, Table +-- *IMPORTANT NOTE* We set hive.exec.schema.evolution=false above since schema evolution is always used for ACID. +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table5 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... UPDATE New Columns +--- +CREATE TABLE table5(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table5 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@table5 +POSTHOOK: query: insert into table table5 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@table5 +POSTHOOK: Lineage: table5.a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table5.b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table5.c EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table5.d SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- UPDATE New Columns +update table5 set c=99 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: -- UPDATE New Columns +update table5 set c=99 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +row__id a b _c3 d +PREHOOK: query: select a,b,c,d from table5 +PREHOOK: type: QUERY +PREHOOK: Input: default@table5 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table5 +#### A masked pattern was here #### +a b c d +1 new 99 ten +1 original 99 NULL +2 new 99 twenty +2 original 99 NULL +3 new 99 thirty +3 original 99 NULL +4 new 99 forty +4 original 99 NULL +5 new 99 hundred +6 new 99 two hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table6 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where old column +--- +CREATE TABLE table6(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table6 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__11 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__11 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__11)values__tmp__table__11.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__12 +PREHOOK: Output: default@table6 +POSTHOOK: query: insert into table table6 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__12 +POSTHOOK: Output: default@table6 +POSTHOOK: Lineage: table6.a EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table6.b SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table6.c EXPRESSION [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table6.d SIMPLE [(values__tmp__table__12)values__tmp__table__12.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: -- DELETE where old column +delete from table6 where a = 2 or a = 4 or a = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +row__id +PREHOOK: query: select a,b,c,d from table6 +PREHOOK: type: QUERY +PREHOOK: Input: default@table6 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table6 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +3 new 30 thirty +3 original NULL NULL +5 new 100 hundred +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table7 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DELETE where new column +--- +CREATE TABLE table7(a INT, b STRING) clustered by (a) into 2 buckets STORED AS ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__13 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__13 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__13)values__tmp__table__13.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table7 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +PREHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__14 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__14 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__14)values__tmp__table__14.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__15 +PREHOOK: Output: default@table7 +POSTHOOK: query: insert into table table7 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__15 +POSTHOOK: Output: default@table7 +POSTHOOK: Lineage: table7.a EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table7.b SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table7.c EXPRESSION [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table7.d SIMPLE [(values__tmp__table__15)values__tmp__table__15.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: -- DELETE where new column +delete from table7 where a = 1 or c = 30 or c == 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 +row__id +PREHOOK: query: select a,b,c,d from table7 +PREHOOK: type: QUERY +PREHOOK: Input: default@table7 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table7 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table7 +#### A masked pattern was here #### +a b c d +2 new 20 twenty +2 original NULL NULL +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +6 new 200 two hundred +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: DROP TABLE table5 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table5 +PREHOOK: Output: default@table5 +POSTHOOK: query: DROP TABLE table5 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table5 +POSTHOOK: Output: default@table5 +PREHOOK: query: DROP TABLE table6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table6 +PREHOOK: Output: default@table6 +POSTHOOK: query: DROP TABLE table6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table6 +POSTHOOK: Output: default@table6 +PREHOOK: query: DROP TABLE table7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table7 +PREHOOK: Output: default@table7 +POSTHOOK: query: DROP TABLE table7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table7 +POSTHOOK: Output: default@table7 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_fetchwork_part.q.out new file mode 100644 index 0000000..dc2fd9a --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_fetchwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_fetchwork_table.q.out new file mode 100644 index 0000000..93f4dcc --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_fetchwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_mapwork_part.q.out new file mode 100644 index 0000000..dc2fd9a --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_mapwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_mapwork_table.q.out new file mode 100644 index 0000000..df9494f --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_nonvec_mapwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_vec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_vec_mapwork_part.q.out new file mode 100644 index 0000000..9f1fda8 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_vec_mapwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new 100 +1 6 new 200 +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new 100 hundred +1 6 new 200 two hundred +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 100 hundred +1 6 200 two hundred +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 hundred +1 6 two hundred +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 100 +1 200 +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 hundred +1 two hundred +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 90000 new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_orc_vec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_orc_vec_mapwork_table.q.out new file mode 100644 index 0000000..aef9a74 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_orc_vec_mapwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_fetchwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_fetchwork_table.q.out new file mode 100644 index 0000000..f849004 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_fetchwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_mapwork_table.q.out new file mode 100644 index 0000000..f849004 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_mapwork_table.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 new +1 original +2 new +2 original +3 new +3 original +4 new +4 original +5 new +6 new +PREHOOK: query: select a,b,c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c +1 new 10 +1 original NULL +2 new 20 +2 original NULL +3 new 30 +3 original NULL +4 new 40 +4 original NULL +5 new 100 +6 new 200 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +5 new 100 hundred +6 new 200 two hundred +PREHOOK: query: select a,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a c d +1 10 ten +1 NULL NULL +2 20 twenty +2 NULL NULL +3 30 thirty +3 NULL NULL +4 40 forty +4 NULL NULL +5 100 hundred +6 200 two hundred +PREHOOK: query: select a,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a d +1 NULL +1 ten +2 NULL +2 twenty +3 NULL +3 thirty +4 NULL +4 forty +5 hundred +6 two hundred +PREHOOK: query: select c from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select c from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +c +10 +100 +20 +200 +30 +40 +NULL +NULL +NULL +NULL +PREHOOK: query: select d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +d +NULL +NULL +NULL +NULL +forty +hundred +ten +thirty +twenty +two hundred +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table2(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table table2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 +PREHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table2 +POSTHOOK: query: insert into table table2 values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table2 +POSTHOOK: Lineage: table2.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table2.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table2 +PREHOOK: type: QUERY +PREHOOK: Input: default@table2 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table2 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table2 +PREHOOK: Output: default@table2 +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table2 +POSTHOOK: Output: default@table2 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out new file mode 100644 index 0000000..44ce24e --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, FetchWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out new file mode 100644 index 0000000..4003c20 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_fetchwork_table.q.out @@ -0,0 +1,297 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 original +2 original +3 original +4 original +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: select a,b,c,d,e from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d e +1 new 10 ten NULL +1 original NULL NULL NULL +2 new 20 twenty NULL +2 original NULL NULL NULL +3 new 30 thirty NULL +3 original NULL NULL NULL +4 new 40 forty NULL +4 original NULL NULL NULL +5 new 100 hundred another1 +6 new 200 two hundred another2 +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +3 original +4 original +6737 original +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +6737 original +72909 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: DROP TABLE table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out new file mode 100644 index 0000000..44f5822 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_part.q.out @@ -0,0 +1,642 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: TEXT, Non-Vectorized, MapWork, Partitioned +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... STATIC INSERT +--- +CREATE TABLE partitioned1(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@partitioned1@part=2 +POSTHOOK: query: insert into table partitioned1 partition(part=2) values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@partitioned1@part=2 +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=2).d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@partitioned1@part=1 +POSTHOOK: query: insert into table partitioned1 partition(part=1) values(5, 'new', 100, 'hundred'),(6, 'new', 200, 'two hundred') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@partitioned1@part=1 +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned1 PARTITION(part=1).d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... STATIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned2(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned2 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@partitioned2@part=2 +POSTHOOK: query: insert into table partitioned2 partition(part=2) values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@partitioned2@part=2 +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=2).b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@partitioned2@part=1 +POSTHOOK: query: insert into table partitioned2 partition(part=1) values(5000, 'new'),(90000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@partitioned2@part=1 +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned2 PARTITION(part=1).b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select part,a,b from partitioned2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned2 +PREHOOK: Input: default@partitioned2@part=1 +PREHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Input: default@partitioned2@part=1 +POSTHOOK: Input: default@partitioned2@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS ... DYNAMIC INSERT +--- +CREATE TABLE partitioned3(a INT, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__7 +PREHOOK: Output: default@partitioned3@part=1 +POSTHOOK: query: insert into table partitioned3 partition(part=1) values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__7 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: -- Table-Non-Cascade ADD COLUMNS ... +alter table partitioned3 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__8 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: insert into table partitioned3 partition(part) values(1, 'new', 10, 'ten', 2),(2, 'new', 20, 'twenty', 2), (3, 'new', 30, 'thirty', 2),(4, 'new', 40, 'forty', 2), + (5, 'new', 100, 'hundred', 1),(6, 'new', 200, 'two hundred', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__8 +POSTHOOK: Output: default@partitioned3@part=1 +POSTHOOK: Output: default@partitioned3@part=2 +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=1).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).a EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).b SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).c EXPRESSION [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: partitioned3 PARTITION(part=2).d SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: -- SELECT permutation columns to make sure NULL defaulting works right +select part,a,b from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b +1 1 original +1 2 original +1 3 original +1 4 original +1 5 new +1 6 new +2 1 new +2 2 new +2 3 new +2 4 new +PREHOOK: query: select part,a,b,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c +1 1 original NULL +1 2 original NULL +1 3 original NULL +1 4 original NULL +1 5 new NULL +1 6 new NULL +2 1 new 10 +2 2 new 20 +2 3 new 30 +2 4 new 40 +PREHOOK: query: select part,a,b,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a b c d +1 1 original NULL NULL +1 2 original NULL NULL +1 3 original NULL NULL +1 4 original NULL NULL +1 5 new NULL NULL +1 6 new NULL NULL +2 1 new 10 ten +2 2 new 20 twenty +2 3 new 30 thirty +2 4 new 40 forty +PREHOOK: query: select part,a,c,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,c,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a c d +1 1 NULL NULL +1 2 NULL NULL +1 3 NULL NULL +1 4 NULL NULL +1 5 NULL NULL +1 6 NULL NULL +2 1 10 ten +2 2 20 twenty +2 3 30 thirty +2 4 40 forty +PREHOOK: query: select part,a,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part a d +1 1 NULL +1 2 NULL +1 3 NULL +1 4 NULL +1 5 NULL +1 6 NULL +2 1 ten +2 2 twenty +2 3 thirty +2 4 forty +PREHOOK: query: select part,c from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,c from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part c +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 10 +2 20 +2 30 +2 40 +PREHOOK: query: select part,d from partitioned1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned1 +PREHOOK: Input: default@partitioned1@part=1 +PREHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,d from partitioned1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Input: default@partitioned1@part=1 +POSTHOOK: Input: default@partitioned1@part=2 +#### A masked pattern was here #### +part d +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +1 NULL +2 forty +2 ten +2 thirty +2 twenty +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN ... DYNAMIC INSERT +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE partitioned4(a smallint, b STRING) PARTITIONED BY(part INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__9 +PREHOOK: Output: default@partitioned4@part=1 +POSTHOOK: query: insert into table partitioned4 partition(part=1) values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__9 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__9)values__tmp__table__9.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: -- Table-Non-Cascade CHANGE COLUMNS ... +alter table partitioned4 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 +PREHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__10 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: insert into table partitioned4 partition(part) values(72909, 'new', 2),(200, 'new', 2), (32768, 'new', 2),(40000, 'new', 2), + (5000, 'new', 1),(90000, 'new', 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__10 +POSTHOOK: Output: default@partitioned4@part=1 +POSTHOOK: Output: default@partitioned4@part=2 +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=1).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).a EXPRESSION [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: partitioned4 PARTITION(part=2).b SIMPLE [(values__tmp__table__10)values__tmp__table__10.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select part,a,b from partitioned4 +PREHOOK: type: QUERY +PREHOOK: Input: default@partitioned4 +PREHOOK: Input: default@partitioned4@part=1 +PREHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +POSTHOOK: query: select part,a,b from partitioned4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Input: default@partitioned4@part=1 +POSTHOOK: Input: default@partitioned4@part=2 +#### A masked pattern was here #### +part a b +1 1000 original +1 3 original +1 4 original +1 5000 new +1 6737 original +1 NULL new +2 200 new +2 32768 new +2 40000 new +2 72909 new +PREHOOK: query: DROP TABLE partitioned1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned1 +PREHOOK: Output: default@partitioned1 +POSTHOOK: query: DROP TABLE partitioned1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned1 +POSTHOOK: Output: default@partitioned1 +PREHOOK: query: DROP TABLE partitioned2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned2 +PREHOOK: Output: default@partitioned2 +POSTHOOK: query: DROP TABLE partitioned2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned2 +POSTHOOK: Output: default@partitioned2 +PREHOOK: query: DROP TABLE partitioned3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned3 +PREHOOK: Output: default@partitioned3 +POSTHOOK: query: DROP TABLE partitioned3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned3 +POSTHOOK: Output: default@partitioned3 +PREHOOK: query: DROP TABLE partitioned4 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partitioned4 +PREHOOK: Output: default@partitioned4 +POSTHOOK: query: DROP TABLE partitioned4 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partitioned4 +POSTHOOK: Output: default@partitioned4 diff --git ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out new file mode 100644 index 0000000..4003c20 --- /dev/null +++ ql/src/test/results/clientpositive/tez/schema_evol_text_nonvec_mapwork_table.q.out @@ -0,0 +1,297 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table1 +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- +-- FILE VARIATION: ORC, Non-Vectorized, MapWork, Table +-- +-- +-- SECTION VARIATION: ALTER TABLE ADD COLUMNS +--- +CREATE TABLE table1(a INT, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'original'),(2, 'original'), (3, 'original'),(4, 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b +1 original +2 original +3 original +4 original +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(c int, d string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(1, 'new', 10, 'ten'),(2, 'new', 20, 'twenty'), (3, 'new', 30, 'thirty'),(4, 'new', 40, 'forty') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +_col0 _col1 _col2 _col3 +PREHOOK: query: select a,b,c,d from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d +1 new 10 ten +1 original NULL NULL +2 new 20 twenty +2 original NULL NULL +3 new 30 thirty +3 original NULL NULL +4 new 40 forty +4 original NULL NULL +PREHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: -- ADD COLUMNS +alter table table1 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@table1 +POSTHOOK: query: insert into table table1 values(5, 'new', 100, 'hundred', 'another1'),(6, 'new', 200, 'two hundred', 'another2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@table1 +POSTHOOK: Lineage: table1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table1.b SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table1.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: table1.d SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: table1.e SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +_col0 _col1 _col2 _col3 _col4 +PREHOOK: query: select a,b,c,d,e from table1 +PREHOOK: type: QUERY +PREHOOK: Input: default@table1 +#### A masked pattern was here #### +POSTHOOK: query: select a,b,c,d,e from table1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table1 +#### A masked pattern was here #### +a b c d e +1 new 10 ten NULL +1 original NULL NULL NULL +2 new 20 twenty NULL +2 original NULL NULL NULL +3 new 30 thirty NULL +3 original NULL NULL NULL +4 new 40 forty NULL +4 original NULL NULL NULL +5 new 100 hundred another1 +6 new 200 two hundred another2 +PREHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table3 +POSTHOOK: query: -- +-- SECTION VARIATION: ALTER TABLE CHANGE COLUMN +-- smallint = (2-byte signed integer, from -32,768 to 32,767) +-- +CREATE TABLE table3(a smallint, b STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__4 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(1000, 'original'),(6737, 'original'), ('3', 'original'),('4', 'original') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__4 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +3 original +4 original +6737 original +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__5 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(72909, 'new'),(200, 'new'), (32768, 'new'),(40000, 'new') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__5 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +_col0 _col1 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +6737 original +72909 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 add columns(e string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__6 +PREHOOK: Output: default@table3 +POSTHOOK: query: insert into table table3 values(5000, 'new', 'another5'),(90000, 'new', 'another6') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__6 +POSTHOOK: Output: default@table3 +POSTHOOK: Lineage: table3.a EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: table3.b SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: table3.e SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +_col0 _col1 _col2 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: -- ADD COLUMNS ... RESTRICT +alter table table3 change column a a int +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 +PREHOOK: query: select a,b from table3 +PREHOOK: type: QUERY +PREHOOK: Input: default@table3 +#### A masked pattern was here #### +POSTHOOK: query: select a,b from table3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table3 +#### A masked pattern was here #### +a b +1000 original +200 new +3 original +32768 new +4 original +40000 new +5000 new +6737 original +72909 new +90000 new +PREHOOK: query: DROP TABLE table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table1 +PREHOOK: Output: default@table1 +POSTHOOK: query: DROP TABLE table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table1 +POSTHOOK: Output: default@table1 +PREHOOK: query: DROP TABLE table2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE table2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE table3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@table3 +PREHOOK: Output: default@table3 +POSTHOOK: query: DROP TABLE table3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@table3 +POSTHOOK: Output: default@table3 diff --git ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out index 65b2ff1..04f8a1f 100644 --- ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols.q.out @@ -237,6 +237,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reducer 2 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out index 0437ff6..99f42e9 100644 --- ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/vector_partition_diff_num_cols.q.out @@ -225,6 +225,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java index 2b6d9c0..7c77dd8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; @@ -62,7 +63,7 @@ public class BinarySortableDeserializeRead implements DeserializeRead { public static final Log LOG = LogFactory.getLog(BinarySortableDeserializeRead.class.getName()); - private PrimitiveTypeInfo[] primitiveTypeInfos; + private TypeInfo[] typeInfos; // The sort order (ascending/descending) for each field. Set to true when descending (invert). private boolean[] columnSortOrderIsDesc; @@ -94,14 +95,14 @@ public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) { this(primitiveTypeInfos, null); } - public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, + public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean[] columnSortOrderIsDesc) { - this.primitiveTypeInfos = primitiveTypeInfos; - fieldCount = primitiveTypeInfos.length; + this.typeInfos = typeInfos; + fieldCount = typeInfos.length; if (columnSortOrderIsDesc != null) { this.columnSortOrderIsDesc = columnSortOrderIsDesc; } else { - this.columnSortOrderIsDesc = new boolean[primitiveTypeInfos.length]; + this.columnSortOrderIsDesc = new boolean[typeInfos.length]; Arrays.fill(this.columnSortOrderIsDesc, false); } inputByteBuffer = new InputByteBuffer(); @@ -117,8 +118,8 @@ private BinarySortableDeserializeRead() { /* * The primitive type information for all fields. */ - public PrimitiveTypeInfo[] primitiveTypeInfos() { - return primitiveTypeInfos; + public TypeInfo[] typeInfos() { + return typeInfos; } /* @@ -176,7 +177,7 @@ public boolean readCheckNull() throws IOException { // We have a field and are positioned to it. - if (primitiveTypeInfos[fieldIndex].getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { + if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { return false; } @@ -375,7 +376,7 @@ public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOExcep (BinarySortableReadHiveCharResults) readHiveCharResults; if (!binarySortableReadHiveCharResults.isInit()) { - binarySortableReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + binarySortableReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]); } HiveCharWritable hiveCharWritable = binarySortableReadHiveCharResults.getHiveCharWritable(); @@ -416,7 +417,7 @@ public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throw BinarySortableReadHiveVarcharResults binarySortableReadHiveVarcharResults = (BinarySortableReadHiveVarcharResults) readHiveVarcharResults; if (!binarySortableReadHiveVarcharResults.isInit()) { - binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + binarySortableReadHiveVarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]); } HiveVarcharWritable hiveVarcharWritable = binarySortableReadHiveVarcharResults.getHiveVarcharWritable(); @@ -733,7 +734,7 @@ private boolean earlyReadHiveDecimal() throws IOException { } tempHiveDecimalWritable.set(bd); - saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; int precision = saveDecimalTypeInfo.getPrecision(); int scale = saveDecimalTypeInfo.getScale(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java index 285ae10..3779f1a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableSerializeWrite.java @@ -32,13 +32,8 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; -import org.apache.hadoop.hive.serde2.binarysortable.InputByteBuffer; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalDayTime; -import org.apache.hadoop.hive.serde2.lazy.LazyHiveIntervalYearMonth; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hive.common.util.DateUtils; diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java index b187aff..c2b0cfc 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; /* @@ -55,9 +55,9 @@ public interface DeserializeRead { /* - * The primitive type information for all fields. + * The type information for all fields. */ - PrimitiveTypeInfo[] primitiveTypeInfos(); + TypeInfo[] typeInfos(); /* * Set the range of bytes to be deserialized. diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index 8c5b0b3..d4220ac 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -30,32 +30,24 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; -import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalDayTimeResults; -import org.apache.hadoop.hive.serde2.fast.DeserializeRead.ReadIntervalYearMonthResults; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBinary; import org.apache.hadoop.hive.serde2.lazy.LazyByte; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; -import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.TimestampParser; @@ -69,21 +61,19 @@ * * Reading some fields require a results object to receive value information. A separate * results object is created by the caller at initialization per different field even for the same - * type. + * type. * * Some type values are by reference to either bytes in the deserialization buffer or to * other type specific buffers. So, those references are only valid until the next time set is * called. */ -public class LazySimpleDeserializeRead implements DeserializeRead { +public final class LazySimpleDeserializeRead implements DeserializeRead { public static final Log LOG = LogFactory.getLog(LazySimpleDeserializeRead.class.getName()); - private PrimitiveTypeInfo[] primitiveTypeInfos; + private TypeInfo[] typeInfos; - private LazySerDeParameters lazyParams; private byte separator; - private boolean lastColumnTakesRest; private boolean isEscaped; private byte escapeChar; private byte[] nullSequenceBytes; @@ -122,21 +112,19 @@ private boolean readBeyondBufferRangeWarned; private boolean bufferRangeHasExtraDataWarned; - public LazySimpleDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos, + public LazySimpleDeserializeRead(TypeInfo[] typeInfos, byte separator, LazySerDeParameters lazyParams) { - this.primitiveTypeInfos = primitiveTypeInfos; + this.typeInfos = typeInfos; this.separator = separator; - this.lazyParams = lazyParams; - lastColumnTakesRest = lazyParams.isLastColumnTakesRest(); isEscaped = lazyParams.isEscaped(); escapeChar = lazyParams.getEscapeChar(); nullSequenceBytes = lazyParams.getNullSequence().getBytes(); isExtendedBooleanLiteral = lazyParams.isExtendedBooleanLiteral(); - fieldCount = primitiveTypeInfos.length; + fieldCount = typeInfos.length; tempText = new Text(); readBeyondConfiguredFieldsWarned = false; readBeyondBufferRangeWarned = false; @@ -148,10 +136,11 @@ private LazySimpleDeserializeRead() { } /* - * The primitive type information for all fields. + * The type information for all fields. */ - public PrimitiveTypeInfo[] primitiveTypeInfos() { - return primitiveTypeInfos; + @Override + public TypeInfo[] typeInfos() { + return typeInfos; } /* @@ -189,7 +178,7 @@ public boolean readCheckNull() { if (!readBeyondBufferRangeWarned) { // Warn only once. int length = end - start; - LOG.info("Reading beyond buffer range! Buffer range " + start + LOG.info("Reading beyond buffer range! Buffer range " + start + " for length " + length + " but reading more (NULLs returned)." + " Ignoring similar problems."); readBeyondBufferRangeWarned = true; @@ -243,7 +232,7 @@ public boolean readCheckNull() { } } - switch (primitiveTypeInfos[fieldIndex].getPrimitiveCategory()) { + switch (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory()) { case BOOLEAN: { int i = fieldStart; @@ -427,7 +416,7 @@ public boolean readCheckNull() { try { s = new String(bytes, fieldStart, fieldLength, "US-ASCII"); } catch (UnsupportedEncodingException e) { - LOG.error(e); + LOG.error("Unsupported encoding found ", e); s = ""; } @@ -466,7 +455,7 @@ public boolean readCheckNull() { // } break; case INTERVAL_DAY_TIME: - { + { String s = null; try { s = Text.decode(bytes, fieldStart, fieldLength); @@ -491,7 +480,7 @@ public boolean readCheckNull() { } saveDecimal = HiveDecimal.create(byteData); - saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; int precision = saveDecimalTypeInfo.getPrecision(); int scale = saveDecimalTypeInfo.getScale(); saveDecimal = HiveDecimalUtils.enforcePrecisionScale(saveDecimal, precision, scale); @@ -507,7 +496,7 @@ public boolean readCheckNull() { break; default: - throw new Error("Unexpected primitive category " + primitiveTypeInfos[fieldIndex].getPrimitiveCategory()); + throw new Error("Unexpected primitive category " + ((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory()); } return false; @@ -529,13 +518,14 @@ public void logExceptionMessage(byte[] bytes, int bytesStart, int bytesLength, S /* * Call this method after all fields have been read to check for extra fields. */ + @Override public void extraFieldsCheck() { if (offset < end) { // We did not consume all of the byte range. if (!bufferRangeHasExtraDataWarned) { // Warn only once. int length = end - start; - LOG.info("Not all fields were read in the buffer range! Buffer range " + start + LOG.info("Not all fields were read in the buffer range! Buffer range " + start + " for length " + length + " but reading more (NULLs returned)." + " Ignoring similar problems."); bufferRangeHasExtraDataWarned = true; @@ -630,7 +620,7 @@ public LazySimpleReadStringResults() { } // Reading a STRING field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. + // results object is created by the caller at initialization per different bytes field. @Override public ReadStringResults createReadStringResults() { return new LazySimpleReadStringResults(); @@ -663,17 +653,18 @@ public HiveCharWritable getHiveCharWritable() { } // Reading a CHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different CHAR field. + // results object is created by the caller at initialization per different CHAR field. @Override public ReadHiveCharResults createReadHiveCharResults() { return new LazySimpleReadHiveCharResults(); } + @Override public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOException { LazySimpleReadHiveCharResults LazySimpleReadHiveCharResults = (LazySimpleReadHiveCharResults) readHiveCharResults; if (!LazySimpleReadHiveCharResults.isInit()) { - LazySimpleReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + LazySimpleReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]); } if (LazySimpleReadHiveCharResults.readStringResults == null) { @@ -714,17 +705,18 @@ public HiveVarcharWritable getHiveVarcharWritable() { } // Reading a VARCHAR field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different VARCHAR field. + // results object is created by the caller at initialization per different VARCHAR field. @Override public ReadHiveVarcharResults createReadHiveVarcharResults() { return new LazySimpleReadHiveVarcharResults(); } + @Override public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throws IOException { LazySimpleReadHiveVarcharResults lazySimpleReadHiveVarvarcharResults = (LazySimpleReadHiveVarcharResults) readHiveVarcharResults; if (!lazySimpleReadHiveVarvarcharResults.isInit()) { - lazySimpleReadHiveVarvarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + lazySimpleReadHiveVarvarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]); } if (lazySimpleReadHiveVarvarcharResults.readStringResults == null) { @@ -757,7 +749,7 @@ public LazySimpleReadBinaryResults() { } // Reading a BINARY field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different bytes field. + // results object is created by the caller at initialization per different bytes field. @Override public ReadBinaryResults createReadBinaryResults() { return new LazySimpleReadBinaryResults(); @@ -787,7 +779,7 @@ public DateWritable getDateWritable() { } // Reading a DATE field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DATE field. + // results object is created by the caller at initialization per different DATE field. @Override public ReadDateResults createReadDateResults() { return new LazySimpleReadDateResults(); @@ -821,7 +813,7 @@ public HiveIntervalYearMonthWritable getHiveIntervalYearMonthWritable() { // Reading a INTERVAL_YEAR_MONTH field require a results object to receive value information. // A separate results object is created by the caller at initialization per different - // INTERVAL_YEAR_MONTH field. + // INTERVAL_YEAR_MONTH field. @Override public ReadIntervalYearMonthResults createReadIntervalYearMonthResults() { return new LazySimpleReadIntervalYearMonthResults(); @@ -833,7 +825,7 @@ public void readIntervalYearMonth(ReadIntervalYearMonthResults readIntervalYearM LazySimpleReadIntervalYearMonthResults lazySimpleReadIntervalYearMonthResults = (LazySimpleReadIntervalYearMonthResults) readIntervalYearMonthResults; - HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = + HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable = lazySimpleReadIntervalYearMonthResults.getHiveIntervalYearMonthWritable(); hiveIntervalYearMonthWritable.set(saveIntervalYearMonth); saveIntervalYearMonth = null; @@ -857,7 +849,7 @@ public HiveIntervalDayTimeWritable getHiveIntervalDayTimeWritable() { // Reading a INTERVAL_DAY_TIME field require a results object to receive value information. // A separate results object is created by the caller at initialization per different - // INTERVAL_DAY_TIME field. + // INTERVAL_DAY_TIME field. @Override public ReadIntervalDayTimeResults createReadIntervalDayTimeResults() { return new LazySimpleReadIntervalDayTimeResults(); @@ -869,7 +861,7 @@ public void readIntervalDayTime(ReadIntervalDayTimeResults readIntervalDayTimeRe LazySimpleReadIntervalDayTimeResults lazySimpleReadIntervalDayTimeResults = (LazySimpleReadIntervalDayTimeResults) readIntervalDayTimeResults; - HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = + HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable = lazySimpleReadIntervalDayTimeResults.getHiveIntervalDayTimeWritable(); hiveIntervalDayTimeWritable.set(saveIntervalDayTime); saveIntervalDayTime = null; @@ -892,7 +884,7 @@ public TimestampWritable getTimestampWritable() { } // Reading a TIMESTAMP field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different TIMESTAMP field. + // results object is created by the caller at initialization per different TIMESTAMP field. @Override public ReadTimestampResults createReadTimestampResults() { return new LazySimpleReadTimestampResults(); @@ -900,7 +892,7 @@ public ReadTimestampResults createReadTimestampResults() { @Override public void readTimestamp(ReadTimestampResults readTimestampResults) { - LazySimpleReadTimestampResults lazySimpleReadTimestampResults = + LazySimpleReadTimestampResults lazySimpleReadTimestampResults = (LazySimpleReadTimestampResults) readTimestampResults; TimestampWritable timestampWritable = lazySimpleReadTimestampResults.getTimestampWritable(); @@ -928,7 +920,7 @@ public HiveDecimal getHiveDecimal() { } // Reading a DECIMAL field require a results object to receive value information. A separate - // results object is created by the caller at initialization per different DECIMAL field. + // results object is created by the caller at initialization per different DECIMAL field. @Override public ReadDecimalResults createReadDecimalResults() { return new LazySimpleReadDecimalResults(); @@ -952,101 +944,6 @@ public void readHiveDecimal(ReadDecimalResults readDecimalResults) { private static int maxLongDigitsCount = maxLongBytes.length; private static byte[] minLongNoSignBytes = ((Long) Long.MIN_VALUE).toString().substring(1).getBytes(); - private boolean parseLongFast() { - - // Parse without using exceptions for better performance. - int i = fieldStart; - int end = fieldStart + fieldLength; - boolean negative = false; - if (i >= end) { - return false; // Empty field. - } - if (bytes[i] == '+') { - i++; - if (i >= end) { - return false; - } - } else if (bytes[i] == '-') { - negative = true; - i++; - if (i >= end) { - return false; - } - } - // Skip leading zeros. - boolean atLeastOneZero = false; - while (true) { - if (bytes[i] != '0') { - break; - } - i++; - if (i >= end) { - saveLong = 0; - return true; - } - atLeastOneZero = true; - } - // We tolerate and ignore decimal places. - if (bytes[i] == '.') { - if (!atLeastOneZero) { - return false; - } - saveLong = 0; - // Fall through below and verify trailing decimal digits. - } else { - if (!Character.isDigit(bytes[i])) { - return false; - } - int nonLeadingZeroStart = i; - int digitCount = 1; - saveLong = Character.digit(bytes[i], 10); - i++; - while (i < end) { - if (!Character.isDigit(bytes[i])) { - break; - } - digitCount++; - if (digitCount > maxLongDigitsCount) { - return false; - } else if (digitCount == maxLongDigitsCount) { - // Use the old trick of comparing against number string to check for overflow. - if (!negative) { - if (byteArrayCompareRanges(bytes, nonLeadingZeroStart, maxLongBytes, 0, digitCount) >= 1) { - return false; - } - } else { - if (byteArrayCompareRanges(bytes, nonLeadingZeroStart, minLongNoSignBytes, 0, digitCount) >= 1) { - return false; - } - } - } - saveLong = (saveLong * 10) + Character.digit(bytes[i], 10); - } - if (negative) { - // Safe because of our number string comparision against min (negative) long. - saveLong = -saveLong; - } - if (i >= end) { - return true; - } - if (bytes[i] != '.') { - return false; - } - } - // Fall through to here if we detect the start of trailing decimal digits... - // We verify trailing digits only. - while (true) { - i++; - if (i >= end) { - break; - } - if (!Character.isDigit(bytes[i])) { - return false; - } - } - return true; - } - public static int byteArrayCompareRanges(byte[] arg1, int start1, byte[] arg2, int start2, int len) { for (int i = 0; i < len; i++) { // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer. @@ -1059,4 +956,4 @@ public static int byteArrayCompareRanges(byte[] arg1, int start1, byte[] arg2, i return 0; } -} \ No newline at end of file +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java index 77838a1..46f37eb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.charset.CharacterCodingException; import java.sql.Date; import java.sql.Timestamp; @@ -34,7 +33,6 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -47,13 +45,6 @@ import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.DateUtils; @@ -516,4 +507,4 @@ public void writeHiveDecimal(HiveDecimal v) throws IOException { index++; } -} \ No newline at end of file +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index 3d14fbe..2751adc 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -311,6 +311,35 @@ public BooleanRef(boolean v) { public boolean value; } + private static void writeDateToByteStream(RandomAccessOutput byteStream, + DateWritable date) { + LazyBinaryUtils.writeVInt(byteStream, date.getDays()); + } + + public static void setFromBytes(byte[] bytes, int offset, int length, + HiveDecimalWritable dec) { + LazyBinaryUtils.VInt vInt = new LazyBinaryUtils.VInt(); + LazyBinaryUtils.readVInt(bytes, offset, vInt); + int scale = vInt.value; + offset += vInt.length; + LazyBinaryUtils.readVInt(bytes, offset, vInt); + offset += vInt.length; + byte[] internalStorage = dec.getInternalStorage(); + if (internalStorage.length != vInt.value) { + internalStorage = new byte[vInt.value]; + } + System.arraycopy(bytes, offset, internalStorage, 0, vInt.value); + dec.set(internalStorage, scale); + } + + public static void writeToByteStream(RandomAccessOutput byteStream, + HiveDecimalWritable dec) { + LazyBinaryUtils.writeVInt(byteStream, dec.getScale()); + byte[] internalStorage = dec.getInternalStorage(); + LazyBinaryUtils.writeVInt(byteStream, internalStorage.length); + byteStream.write(internalStorage, 0, internalStorage.length); + } + /** * A recursive function that serialize an object to a byte buffer based on its * object inspector. diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index a18e8b8..7406697 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; @@ -39,6 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; /* @@ -58,7 +60,7 @@ public class LazyBinaryDeserializeRead implements DeserializeRead { public static final Log LOG = LogFactory.getLog(LazyBinaryDeserializeRead.class.getName()); - private PrimitiveTypeInfo[] primitiveTypeInfos; + private TypeInfo[] typeInfos; private byte[] bytes; private int start; @@ -80,9 +82,9 @@ private boolean readBeyondBufferRangeWarned; private boolean bufferRangeHasExtraDataWarned; - public LazyBinaryDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) { - this.primitiveTypeInfos = primitiveTypeInfos; - fieldCount = primitiveTypeInfos.length; + public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) { + this.typeInfos = typeInfos; + fieldCount = typeInfos.length; tempVInt = new VInt(); tempVLong = new VLong(); readBeyondConfiguredFieldsWarned = false; @@ -95,10 +97,10 @@ private LazyBinaryDeserializeRead() { } /* - * The primitive type information for all fields. + * The type information for all fields. */ - public PrimitiveTypeInfo[] primitiveTypeInfos() { - return primitiveTypeInfos; + public TypeInfo[] typeInfos() { + return typeInfos; } /* @@ -153,7 +155,7 @@ public boolean readCheckNull() throws IOException { // We have a field and are positioned to it. - if (primitiveTypeInfos[fieldIndex].getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { + if (((PrimitiveTypeInfo) typeInfos[fieldIndex]).getPrimitiveCategory() != PrimitiveCategory.DECIMAL) { return false; } @@ -508,7 +510,7 @@ public void readHiveChar(ReadHiveCharResults readHiveCharResults) throws IOExcep LazyBinaryReadHiveCharResults lazyBinaryReadHiveCharResults = (LazyBinaryReadHiveCharResults) readHiveCharResults; if (!lazyBinaryReadHiveCharResults.isInit()) { - lazyBinaryReadHiveCharResults.init((CharTypeInfo) primitiveTypeInfos[fieldIndex]); + lazyBinaryReadHiveCharResults.init((CharTypeInfo) typeInfos[fieldIndex]); } if (lazyBinaryReadHiveCharResults.readStringResults == null) { @@ -559,7 +561,7 @@ public void readHiveVarchar(ReadHiveVarcharResults readHiveVarcharResults) throw LazyBinaryReadHiveVarcharResults lazyBinaryReadHiveVarcharResults = (LazyBinaryReadHiveVarcharResults) readHiveVarcharResults; if (!lazyBinaryReadHiveVarcharResults.isInit()) { - lazyBinaryReadHiveVarcharResults.init((VarcharTypeInfo) primitiveTypeInfos[fieldIndex]); + lazyBinaryReadHiveVarcharResults.init((VarcharTypeInfo) typeInfos[fieldIndex]); } if (lazyBinaryReadHiveVarcharResults.readStringResults == null) { @@ -913,9 +915,10 @@ private boolean earlyReadHiveDecimal() throws EOFException { if (tempHiveDecimalWritable == null) { tempHiveDecimalWritable = new HiveDecimalWritable(); } - tempHiveDecimalWritable.setFromBytes(bytes, saveStart, length); + LazyBinarySerDe.setFromBytes(bytes, saveStart, length, + tempHiveDecimalWritable); - saveDecimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfos[fieldIndex]; + saveDecimalTypeInfo = (DecimalTypeInfo) typeInfos[fieldIndex]; int precision = saveDecimalTypeInfo.getPrecision(); int scale = saveDecimalTypeInfo.getScale(); @@ -939,4 +942,4 @@ private boolean earlyReadHiveDecimal() throws EOFException { // Now return whether it is NULL or NOT NULL. return (saveDecimal == null); } -} \ No newline at end of file +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java index e0d9c0a..2d201ec 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hive.common.util.DateUtils; @@ -742,4 +743,4 @@ public void writeHiveDecimal(HiveDecimal v) throws IOException { output.writeByte(nullOffset, nullByte); } } -} \ No newline at end of file +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index a4323d1..1cae80c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -760,11 +760,47 @@ public static TypeInfo getTypeInfoFromObjectInspector(ObjectInspector oi) { return result; } + public static ArrayList typeInfosFromStructObjectInspector( + StructObjectInspector structObjectInspector) { + + List fields = structObjectInspector.getAllStructFieldRefs(); + ArrayList typeInfoList = new ArrayList(fields.size()); + + for(StructField field : fields) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + field.getFieldObjectInspector().getTypeName()); + typeInfoList.add(typeInfo); + } + return typeInfoList; + } + + public static ArrayList typeInfosFromTypeNames(List typeNames) { + + ArrayList result = new ArrayList(typeNames.size()); + + for(int i = 0; i < typeNames.size(); i++) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); + result.add(typeInfo); + } + return result; + } + public static ArrayList getTypeInfosFromTypeString(String typeString) { TypeInfoParser parser = new TypeInfoParser(typeString); return parser.parseTypeInfos(); } + public static String getTypesString(List typeInfos) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < typeInfos.size(); i++) { + if (i > 0) { + sb.append(":"); + } + sb.append(typeInfos.get(i).getTypeName()); + } + return sb.toString(); + } + public static TypeInfo getTypeInfoFromTypeString(String typeString) { TypeInfoParser parser = new TypeInfoParser(typeString); return parser.parseTypeInfos().get(0);