diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 9007771..b2b1a41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1134,7 +1134,7 @@ public float getProgress() throws IOException { @Override public ObjectInspector getObjectInspector() { - return ((StructObjectInspector) reader.getObjectInspector()) + return ((StructObjectInspector) records.getObjectInspector()) .getAllStructFieldRefs().get(OrcRecordUpdater.ROW) .getFieldObjectInspector(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 74ea23b..cff7915 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.RecordIdentifier; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; @@ -37,9 +38,10 @@ import org.apache.hadoop.io.LongWritable; import java.io.IOException; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; +import java.util.Deque; import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -627,8 +629,17 @@ public ObjectInspector getObjectInspector() { // Parse the configuration parameters ArrayList columnNames = new ArrayList(); + Deque virtualColumns = new ArrayDeque(); if (columnNameProperty != null && columnNameProperty.length() > 0) { - Collections.addAll(columnNames, columnNameProperty.split(",")); + //Collections.addAll(columnNames, columnNameProperty.split(",")); + String[] colNames = columnNameProperty.split(","); + for (int i = 0; i < colNames.length; i++) { + if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) { + virtualColumns.addLast(i); + } else { + columnNames.add(colNames[i]); + } + } } if (columnTypeProperty == null) { // Default type: all string @@ -644,6 +655,9 @@ public ObjectInspector getObjectInspector() { ArrayList fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + while (virtualColumns.size() > 0) { + fieldTypes.remove(virtualColumns.removeLast()); + } StructTypeInfo rowType = new StructTypeInfo(); rowType.setAllStructFieldNames(columnNames); rowType.setAllStructFieldTypeInfos(fieldTypes); diff --git ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index 55392c9..a15a7a7 100644 --- ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -1266,6 +1267,8 @@ JobConf createMockExecutionEnvironment(Path workDir, } conf.set("hive.io.file.readcolumn.ids", columnIds.toString()); conf.set("partition_columns", "p"); + conf.set(serdeConstants.LIST_COLUMNS, columnNames.toString()); + conf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString()); MockFileSystem fs = (MockFileSystem) warehouseDir.getFileSystem(conf); fs.clear(); diff --git ql/src/test/queries/clientpositive/acid_vectorization.q ql/src/test/queries/clientpositive/acid_vectorization.q index 3f386c9..4b11412 100644 --- ql/src/test/queries/clientpositive/acid_vectorization.q +++ ql/src/test/queries/clientpositive/acid_vectorization.q @@ -12,3 +12,5 @@ set hive.vectorized.execution.enabled=true; update acid_vectorized set b = 'foo' where b = 'bar'; set hive.vectorized.execution.enabled=true; delete from acid_vectorized where b = 'foo'; +set hive.vectorized.execution.enabled=true; +select a, b from acid_vectorized order by a, b; diff --git ql/src/test/results/clientpositive/acid_vectorization.q.out ql/src/test/results/clientpositive/acid_vectorization.q.out index 18dada5..1792979 100644 --- ql/src/test/results/clientpositive/acid_vectorization.q.out +++ ql/src/test/results/clientpositive/acid_vectorization.q.out @@ -42,3 +42,21 @@ POSTHOOK: query: delete from acid_vectorized where b = 'foo' POSTHOOK: type: QUERY POSTHOOK: Input: default@acid_vectorized POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: select a, b from acid_vectorized order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select a, b from acid_vectorized order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +-1073279343 oj1YrV5Wa +-1073051226 A34p7oRr2WvUJNf +-1072910839 0iqrc5 +-1072081801 dPkN74F7 +-1072076362 2uLyD28144vklju213J1mr +-1071480828 aw724t8c5558x2xneC624 +-1071363017 Anj0oF +-1070883071 0ruyd6Y50JpdGRf6HqD +-1070551679 iUR3Q +-1069736047 k17Am8uPHWk02cEf1jet diff --git ql/src/test/results/clientpositive/tez/acid_vectorization.q.out ql/src/test/results/clientpositive/tez/acid_vectorization.q.out new file mode 100644 index 0000000..6f0d0bd --- /dev/null +++ ql/src/test/results/clientpositive/tez/acid_vectorization.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert into table acid_vectorized values (1, 'bar') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: insert into table acid_vectorized values (1, 'bar') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@acid_vectorized +POSTHOOK: Lineage: acid_vectorized.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: delete from acid_vectorized where b = 'foo' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +PREHOOK: Output: default@acid_vectorized +POSTHOOK: query: delete from acid_vectorized where b = 'foo' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +POSTHOOK: Output: default@acid_vectorized +PREHOOK: query: select a, b from acid_vectorized order by a, b +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +POSTHOOK: query: select a, b from acid_vectorized order by a, b +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_vectorized +#### A masked pattern was here #### +-1073279343 oj1YrV5Wa +-1073051226 A34p7oRr2WvUJNf +-1072910839 0iqrc5 +-1072081801 dPkN74F7 +-1072076362 2uLyD28144vklju213J1mr +-1071480828 aw724t8c5558x2xneC624 +-1071363017 Anj0oF +-1070883071 0ruyd6Y50JpdGRf6HqD +-1070551679 iUR3Q +-1069736047 k17Am8uPHWk02cEf1jet