diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java index 907a9b8..dd437a7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/BaseVectorizedColumnReader.java @@ -93,6 +93,17 @@ protected final ColumnDescriptor descriptor; protected final Type type; + /** + * Used for VectorizedDummyColumnReader. + */ + public BaseVectorizedColumnReader(){ + this.pageReader = null; + this.descriptor = null; + this.type = null; + this.dictionary = null; + this.maxDefLevel = -1; + } + public BaseVectorizedColumnReader( ColumnDescriptor descriptor, PageReader pageReader, @@ -156,10 +167,12 @@ private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset, int "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } - dataColumn = dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary); + dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(descriptor.getType(), + dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary)); this.isCurrentPageDictionaryEncoded = true; } else { - dataColumn = dataEncoding.getValuesReader(descriptor, VALUES); + dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(descriptor.getType(), + dataEncoding.getValuesReader(descriptor, VALUES)); this.isCurrentPageDictionaryEncoded = false; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java new file mode 100644 index 0000000..93e0781 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.parquet.vector; + +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.PrimitiveType; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Parquet file has self-describing schema which may differ from the user required schema (e.g. + * schema evolution). This factory is used to retrieve user required typed data via corresponding + * reader which reads the underlying data. + */ +public final class ParquetDataColumnReaderFactory { + + private ParquetDataColumnReaderFactory() { + } + + /** + * The default data column reader for existing Parquet page reader which works for both + * dictionary or non dictionary types. + */ + public abstract static class AbstractParquetDataColumnReader extends ValuesReader { + protected ValuesReader realReader; + + public AbstractParquetDataColumnReader(ValuesReader realReader) { + this.realReader = realReader; + } + + @Override + public void initFromPage(int i, ByteBuffer byteBuffer, int i1) throws IOException { + realReader.initFromPage(i, byteBuffer, i1); + } + + /** + * @return the next boolean from the page + */ + @Override + public boolean readBoolean() { + return realReader.readBoolean(); + } + + /** + * @return the next Binary from the page + */ + @Override + public Binary readBytes() { + return realReader.readBytes(); + } + + /** + * @return the next float from the page + */ + @Override + public float readFloat() { + return realReader.readFloat(); + } + + /** + * @return the next double from the page + */ + @Override + public double readDouble() { + return realReader.readDouble(); + } + + /** + * @return the next integer from the page + */ + @Override + public int readInteger() { + return realReader.readInteger(); + } + + /** + * @return the next long from the page + */ + @Override + public long readLong() { + return realReader.readLong(); + } + + @Override + public int readValueDictionaryId() { + return realReader.readValueDictionaryId(); + } + + @Override + public void skip() { + realReader.skip(); + } + } + + /** + * The reader who reads from the underlying int32 value value. Implements in consist with + * ETypeConverter EINT32_CONVERTER + */ + public static class TypesFromInt32PageReader extends AbstractParquetDataColumnReader { + + public TypesFromInt32PageReader(ValuesReader realReader) { + super(realReader); + } + + @Override + public long readLong() { + return realReader.readInteger(); + } + + @Override + public float readFloat() { + return realReader.readInteger(); + } + + @Override + public double readDouble() { + return realReader.readInteger(); + } + } + + /** + * The reader who reads from the underlying int64 value value. Implements in consist with + * ETypeConverter EINT64_CONVERTER + */ + public static class TypesFromInt64PageReader extends AbstractParquetDataColumnReader { + + public TypesFromInt64PageReader(ValuesReader realReader) { + super(realReader); + } + + @Override + public float readFloat() { + return realReader.readLong(); + } + + @Override + public double readDouble() { + return realReader.readLong(); + } + } + + /** + * The reader who reads from the underlying float value value. Implements in consist with + * ETypeConverter EFLOAT_CONVERTER + */ + public static class TypesFromFloatPageReader extends AbstractParquetDataColumnReader { + + public TypesFromFloatPageReader(ValuesReader realReader) { + super(realReader); + } + + @Override + public double readDouble() { + return realReader.readFloat(); + } + } + + public static ValuesReader getDataColumnReaderByType(PrimitiveType.PrimitiveTypeName + parquetType, ValuesReader realReader) + throws IOException { + switch (parquetType) { + case INT32: + return new TypesFromInt32PageReader(realReader); + case INT64: + return new TypesFromInt64PageReader(realReader); + case FLOAT: + return new TypesFromFloatPageReader(realReader); + case BOOLEAN: + case BINARY: + case DOUBLE: + case INT96: + case FIXED_LEN_BYTE_ARRAY: + return realReader; + default: + throw new IOException("Invalid category " + parquetType); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java new file mode 100644 index 0000000..ee1d692 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedDummyColumnReader.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.parquet.vector; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import java.io.IOException; +import java.util.Arrays; + +/** + * A dummy vectorized parquet reader to support schema evolution. + */ +public class VectorizedDummyColumnReader extends BaseVectorizedColumnReader { + + public VectorizedDummyColumnReader() { + super(); + } + + @Override + public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException { + Arrays.fill(column.isNull, true); + column.isRepeating = true; + column.noNulls = false; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java index 08ac57b..5e4a0e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -1,9 +1,13 @@ /* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -11,9 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.io.parquet.vector; -import com.google.common.annotations.VisibleForTesting; +package org.apache.hadoop.hive.ql.io.parquet.vector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; @@ -474,9 +477,13 @@ private VectorizedColumnReader buildVectorizedParquetReader( if (columnDescriptors == null || columnDescriptors.isEmpty()) { throw new RuntimeException( "Failed to find related Parquet column descriptor with type " + type); - } else { + } + if (fileSchema.getColumns().contains(descriptors.get(0))) { return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, type); + } else { + // Support for schema evolution + return new VectorizedDummyColumnReader(); } case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java index 9e414dc..1988d4b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java @@ -55,11 +55,16 @@ public static void cleanup() throws IOException { @Test public void testIntRead() throws Exception { intRead(isDictionaryEncoding); + longReadInt(isDictionaryEncoding); + floatReadInt(isDictionaryEncoding); + doubleReadInt(isDictionaryEncoding); } @Test public void testLongRead() throws Exception { longRead(isDictionaryEncoding); + floatReadLong(isDictionaryEncoding); + doubleReadLong(isDictionaryEncoding); } @Test @@ -70,6 +75,7 @@ public void testDoubleRead() throws Exception { @Test public void testFloatRead() throws Exception { floatRead(isDictionaryEncoding); + doubleReadFloat(isDictionaryEncoding); } @Test diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java index 5d3ebd6..a81d6f9 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java @@ -26,7 +26,6 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.IOConstants; @@ -358,13 +357,87 @@ private static StructObjectInspector createStructObjectInspector(Configuration c return new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); } - protected void intRead(boolean isDictionaryEncoding) throws InterruptedException, HiveException, IOException { - Configuration conf = new Configuration(); - conf.set(IOConstants.COLUMNS,"int32_field"); - conf.set(IOConstants.COLUMNS_TYPES,"int"); + protected void floatReadInt(boolean isDictionaryEncoding) throws InterruptedException, + HiveException, IOException { + conf.set(IOConstants.COLUMNS, "int32_field"); + conf.set(IOConstants.COLUMNS_TYPES, "float"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int32" + + " int32_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + int c = 0; + while (reader.next(NullWritable.get(), previous)) { + DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + assertEquals("Failed at " + c, getIntValue(isDictionaryEncoding, c), vector.vector[i], 0); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + protected void doubleReadInt(boolean isDictionaryEncoding) throws InterruptedException, + HiveException, IOException { + conf.set(IOConstants.COLUMNS, "int32_field"); + conf.set(IOConstants.COLUMNS_TYPES, "double"); conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); VectorizedParquetRecordReader reader = + createTestParquetReader("message test { required int32 int32_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + int c = 0; + while (reader.next(NullWritable.get(), previous)) { + DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + assertEquals("Failed at " + c, getIntValue(isDictionaryEncoding, c), vector.vector[i], 0); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + protected void longReadInt(boolean isDictionaryEncoding) throws InterruptedException, + HiveException, IOException { + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "int32_field"); + c.set(IOConstants.COLUMNS_TYPES, "bigint"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + intRead(isDictionaryEncoding, c); + } + + protected void intRead(boolean isDictionaryEncoding) throws InterruptedException, + HiveException, IOException { + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "int32_field"); + c.set(IOConstants.COLUMNS_TYPES, "int"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + intRead(isDictionaryEncoding, c); + } + + private void intRead(boolean isDictionaryEncoding, Configuration conf) throws + InterruptedException, HiveException, IOException { + VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int32 int32_field;}", conf); VectorizedRowBatch previous = reader.createValue(); try { @@ -387,12 +460,76 @@ protected void intRead(boolean isDictionaryEncoding) throws InterruptedException } } + protected void floatReadLong(boolean isDictionaryEncoding) throws Exception { + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "int64_field"); + c.set(IOConstants.COLUMNS_TYPES, "float"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createTestParquetReader("message test { required int64 int64_field;}", c); + VectorizedRowBatch previous = reader.createValue(); + try { + int count = 0; + while (reader.next(NullWritable.get(), previous)) { + DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if (count == nElements) { + break; + } + assertEquals("Failed at " + count, getLongValue(isDictionaryEncoding, count), vector + .vector[i], 0); + assertFalse(vector.isNull[i]); + count++; + } + } + assertEquals(nElements, count); + } finally { + reader.close(); + } + } + + protected void doubleReadLong(boolean isDictionaryEncoding) throws Exception { + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "int64_field"); + c.set(IOConstants.COLUMNS_TYPES, "double"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createTestParquetReader("message test { required int64 int64_field;}", c); + VectorizedRowBatch previous = reader.createValue(); + try { + int count = 0; + while (reader.next(NullWritable.get(), previous)) { + DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if (count == nElements) { + break; + } + assertEquals("Failed at " + count, getLongValue(isDictionaryEncoding, count), vector + .vector[i], 0); + assertFalse(vector.isNull[i]); + count++; + } + } + assertEquals(nElements, count); + } finally { + reader.close(); + } + } + protected void longRead(boolean isDictionaryEncoding) throws Exception { - Configuration conf = new Configuration(); - conf.set(IOConstants.COLUMNS, "int64_field"); - conf.set(IOConstants.COLUMNS_TYPES, "bigint"); - conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "int64_field"); + c.set(IOConstants.COLUMNS_TYPES, "bigint"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + longRead(isDictionaryEncoding, c); + } + + private void longRead(boolean isDictionaryEncoding, Configuration conf) throws Exception { VectorizedParquetRecordReader reader = createTestParquetReader("message test { required int64 int64_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -417,11 +554,15 @@ protected void longRead(boolean isDictionaryEncoding) throws Exception { } protected void doubleRead(boolean isDictionaryEncoding) throws Exception { - Configuration conf = new Configuration(); - conf.set(IOConstants.COLUMNS, "double_field"); - conf.set(IOConstants.COLUMNS_TYPES, "double"); - conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "double_field"); + c.set(IOConstants.COLUMNS_TYPES, "double"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + doubleRead(isDictionaryEncoding, c); + } + + private void doubleRead(boolean isDictionaryEncoding, Configuration conf) throws Exception { VectorizedParquetRecordReader reader = createTestParquetReader("message test { required double double_field;}", conf); VectorizedRowBatch previous = reader.createValue(); @@ -447,13 +588,26 @@ protected void doubleRead(boolean isDictionaryEncoding) throws Exception { } protected void floatRead(boolean isDictionaryEncoding) throws Exception { - Configuration conf = new Configuration(); - conf.set(IOConstants.COLUMNS, "float_field"); - conf.set(IOConstants.COLUMNS_TYPES, "float"); - conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "float_field"); + c.set(IOConstants.COLUMNS_TYPES, "float"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + floatRead(isDictionaryEncoding, c); + } + + protected void doubleReadFloat(boolean isDictionaryEncoding) throws Exception { + Configuration c = new Configuration(); + c.set(IOConstants.COLUMNS, "float_field"); + c.set(IOConstants.COLUMNS_TYPES, "double"); + c.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + c.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + floatRead(isDictionaryEncoding, c); + } + + private void floatRead(boolean isDictionaryEncoding, Configuration conf) throws Exception { VectorizedParquetRecordReader reader = - createTestParquetReader("message test { required float float_field;}", conf); + createTestParquetReader("message test { required float float_field;}", conf); VectorizedRowBatch previous = reader.createValue(); try { int c = 0; @@ -465,7 +619,7 @@ protected void floatRead(boolean isDictionaryEncoding) throws Exception { break; } assertEquals("Failed at " + c, getFloatValue(isDictionaryEncoding, c), vector.vector[i], - 0); + 0); assertFalse(vector.isNull[i]); c++; } diff --git ql/src/test/queries/clientpositive/schema_evol_par_vec_table.q ql/src/test/queries/clientpositive/schema_evol_par_vec_table.q new file mode 100644 index 0000000..9561521 --- /dev/null +++ ql/src/test/queries/clientpositive/schema_evol_par_vec_table.q @@ -0,0 +1,73 @@ +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; + +drop table test_alter; +drop table test_alter2; +drop table test_alter3; + + +create table test_alter (id string) stored as parquet; +insert into test_alter values ('1'), ('2'), ('3'); +select * from test_alter; + +-- add new column -> empty col values should return NULL +alter table test_alter add columns (newCol string); +select * from test_alter; + +-- insert data into new column -> New data should be returned +insert into test_alter values ('4', '100'); +select * from test_alter; + +-- remove the newly added column +-- this works in vectorized execution +alter table test_alter replace columns (id string); +select * from test_alter; + +-- add column using replace column syntax +alter table test_alter replace columns (id string, id2 string); +-- this surprisingly doesn't return the 100 added to 4th row above +select * from test_alter; +insert into test_alter values ('5', '100'); +select * from test_alter; + +-- use the same column name and datatype +alter table test_alter replace columns (id string, id2 string); +select * from test_alter; + +-- change string to char +alter table test_alter replace columns (id char(10), id2 string); +select * from test_alter; + +-- change string to varchar +alter table test_alter replace columns (id string, id2 string); +alter table test_alter replace columns (id varchar(10), id2 string); +select * from test_alter; + +-- change columntype and column name +alter table test_alter replace columns (id string, id2 string); +alter table test_alter replace columns (idv varchar(10), id2 string); +-- this returns all NULLs in the col1 but vectorized execution fails +select * from test_alter; + +-- test int to long type conversion +create table test_alter2 (id int) stored as parquet; +insert into test_alter2 values (1); +alter table test_alter2 replace columns (id bigint); +select * from test_alter2; + +-- test float to double type conversion +drop table test_alter2; +create table test_alter2 (id float) stored as parquet; +insert into test_alter2 values (1.5); +alter table test_alter2 replace columns (id double); +select * from test_alter2; + +-- test integer types upconversion +create table test_alter3 (id1 tinyint, id2 smallint, id3 int, id4 bigint) stored as parquet; +insert into test_alter3 values (10, 20, 30, 40); +alter table test_alter3 replace columns (id1 smallint, id2 int, id3 bigint, id4 decimal(10,4)); +-- this fails mostly due to bigint to decimal +-- select * from test_alter3; +select id1, id2, id3 from test_alter3; + + diff --git ql/src/test/results/clientpositive/schema_evol_par_vec_table.q.out ql/src/test/results/clientpositive/schema_evol_par_vec_table.q.out new file mode 100644 index 0000000..a6128b6 --- /dev/null +++ ql/src/test/results/clientpositive/schema_evol_par_vec_table.q.out @@ -0,0 +1,357 @@ +PREHOOK: query: drop table test_alter +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test_alter +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table test_alter2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test_alter2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table test_alter3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table test_alter3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table test_alter (id string) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_alter +POSTHOOK: query: create table test_alter (id string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_alter +PREHOOK: query: insert into test_alter values ('1'), ('2'), ('3') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_alter +POSTHOOK: query: insert into test_alter values ('1'), ('2'), ('3') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_alter +POSTHOOK: Lineage: test_alter.id SCRIPT [] +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: alter table test_alter add columns (newCol string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter add columns (newCol string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +1 NULL +2 NULL +3 NULL +PREHOOK: query: insert into test_alter values ('4', '100') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_alter +POSTHOOK: query: insert into test_alter values ('4', '100') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_alter +POSTHOOK: Lineage: test_alter.id SCRIPT [] +POSTHOOK: Lineage: test_alter.newcol SCRIPT [] +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +1 NULL +2 NULL +3 NULL +4 100 +PREHOOK: query: alter table test_alter replace columns (id string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +1 +2 +3 +4 +PREHOOK: query: alter table test_alter replace columns (id string, id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id string, id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +1 NULL +2 NULL +3 NULL +4 NULL +PREHOOK: query: insert into test_alter values ('5', '100') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_alter +POSTHOOK: query: insert into test_alter values ('5', '100') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_alter +POSTHOOK: Lineage: test_alter.id SCRIPT [] +POSTHOOK: Lineage: test_alter.id2 SCRIPT [] +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +5 100 +1 NULL +2 NULL +3 NULL +4 NULL +PREHOOK: query: alter table test_alter replace columns (id string, id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id string, id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +5 100 +1 NULL +2 NULL +3 NULL +4 NULL +PREHOOK: query: alter table test_alter replace columns (id char(10), id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id char(10), id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +5 100 +1 NULL +2 NULL +3 NULL +4 NULL +PREHOOK: query: alter table test_alter replace columns (id string, id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id string, id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: alter table test_alter replace columns (id varchar(10), id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id varchar(10), id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +5 100 +1 NULL +2 NULL +3 NULL +4 NULL +PREHOOK: query: alter table test_alter replace columns (id string, id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (id string, id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: alter table test_alter replace columns (idv varchar(10), id2 string) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter +PREHOOK: Output: default@test_alter +POSTHOOK: query: alter table test_alter replace columns (idv varchar(10), id2 string) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter +POSTHOOK: Output: default@test_alter +PREHOOK: query: select * from test_alter +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter +#### A masked pattern was here #### +NULL 100 +NULL NULL +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: create table test_alter2 (id int) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: create table test_alter2 (id int) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_alter2 +PREHOOK: query: insert into test_alter2 values (1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: insert into test_alter2 values (1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_alter2 +POSTHOOK: Lineage: test_alter2.id SCRIPT [] +PREHOOK: query: alter table test_alter2 replace columns (id bigint) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter2 +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: alter table test_alter2 replace columns (id bigint) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter2 +POSTHOOK: Output: default@test_alter2 +PREHOOK: query: select * from test_alter2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter2 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter2 +#### A masked pattern was here #### +1 +PREHOOK: query: drop table test_alter2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_alter2 +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: drop table test_alter2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_alter2 +POSTHOOK: Output: default@test_alter2 +PREHOOK: query: create table test_alter2 (id float) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: create table test_alter2 (id float) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_alter2 +PREHOOK: query: insert into test_alter2 values (1.5) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: insert into test_alter2 values (1.5) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_alter2 +POSTHOOK: Lineage: test_alter2.id SCRIPT [] +PREHOOK: query: alter table test_alter2 replace columns (id double) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter2 +PREHOOK: Output: default@test_alter2 +POSTHOOK: query: alter table test_alter2 replace columns (id double) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter2 +POSTHOOK: Output: default@test_alter2 +PREHOOK: query: select * from test_alter2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter2 +#### A masked pattern was here #### +POSTHOOK: query: select * from test_alter2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter2 +#### A masked pattern was here #### +1.5 +PREHOOK: query: create table test_alter3 (id1 tinyint, id2 smallint, id3 int, id4 bigint) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_alter3 +POSTHOOK: query: create table test_alter3 (id1 tinyint, id2 smallint, id3 int, id4 bigint) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_alter3 +PREHOOK: query: insert into test_alter3 values (10, 20, 30, 40) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_alter3 +POSTHOOK: query: insert into test_alter3 values (10, 20, 30, 40) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_alter3 +POSTHOOK: Lineage: test_alter3.id1 SCRIPT [] +POSTHOOK: Lineage: test_alter3.id2 SCRIPT [] +POSTHOOK: Lineage: test_alter3.id3 SCRIPT [] +POSTHOOK: Lineage: test_alter3.id4 SCRIPT [] +PREHOOK: query: alter table test_alter3 replace columns (id1 smallint, id2 int, id3 bigint, id4 decimal(10,4)) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@test_alter3 +PREHOOK: Output: default@test_alter3 +POSTHOOK: query: alter table test_alter3 replace columns (id1 smallint, id2 int, id3 bigint, id4 decimal(10,4)) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@test_alter3 +POSTHOOK: Output: default@test_alter3 +PREHOOK: query: select id1, id2, id3 from test_alter3 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_alter3 +#### A masked pattern was here #### +POSTHOOK: query: select id1, id2, id3 from test_alter3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_alter3 +#### A masked pattern was here #### +10 20 30