diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index c71c540..377ef60 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -746,6 +746,7 @@ minillaplocal.query.files=\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ vector_like_2.q,\ + vector_llap_io_data_conversion.q,\ vector_llap_text_1.q,\ vector_mapjoin_reduce.q,\ vector_null_map.q,\ diff --git llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index cb57a11..eb6629a 100644 --- llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -274,6 +274,11 @@ public void start() { private boolean checkOrcSchemaEvolution() { SchemaEvolution evolution = rp.getSchemaEvolution(); + if (evolution.hasConversion()) { + + // We do not support data type conversion when reading encoded ORC data. + return false; + } // TODO: should this just use physical IDs? for (int i = 0; i < includes.getReaderLogicalColumnIds().size(); ++i) { int projectedColId = includes.getReaderLogicalColumnIds().get(i); diff --git ql/src/test/queries/clientpositive/vector_llap_io_data_conversion.q ql/src/test/queries/clientpositive/vector_llap_io_data_conversion.q new file mode 100644 index 0000000..f40c4b9 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_llap_io_data_conversion.q @@ -0,0 +1,19 @@ +--! qt:dataset:alltypesorc +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; + +set hive.llap.io.enabled=true; +set hive.llap.io.encode.enabled=true; + +create table varchar_single_partition(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) + partitioned by(s varchar(50)) stored as orc; +insert into table varchar_single_partition partition(s='positive') select ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1 from alltypesorc where cint>0 limit 10; +insert into table varchar_single_partition partition(s='negative') select ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1 from alltypesorc where cint<0 limit 10; +alter table varchar_single_partition change column vs vs varchar(10); + +create table varchar_ctas_1 stored as orc as select vs, length(vs) as c1,reverse(vs) as c2 from varchar_single_partition where s='positive'; + +explain vectorization detail +select * from varchar_ctas_1 order by vs, c1, c2; + +select * from varchar_ctas_1 order by vs, c1, c2; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out new file mode 100644 index 0000000..5e70cb3 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out @@ -0,0 +1,187 @@ +PREHOOK: query: create table varchar_single_partition(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) + partitioned by(s varchar(50)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_single_partition +POSTHOOK: query: create table varchar_single_partition(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) + partitioned by(s varchar(50)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_single_partition +PREHOOK: query: insert into table varchar_single_partition partition(s='positive') select ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1 from alltypesorc where cint>0 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_single_partition@s=positive +POSTHOOK: query: insert into table varchar_single_partition partition(s='positive') select ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1 from alltypesorc where cint>0 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_single_partition@s=positive +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vb EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vd EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vf EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vi EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vs EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vsi EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=positive).vt EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: insert into table varchar_single_partition partition(s='negative') select ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1 from alltypesorc where cint<0 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@varchar_single_partition@s=negative +POSTHOOK: query: insert into table varchar_single_partition partition(s='negative') select ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1 from alltypesorc where cint<0 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@varchar_single_partition@s=negative +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vb EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vd EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vf EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vi EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vs EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vsi EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: varchar_single_partition PARTITION(s=negative).vt EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +PREHOOK: query: alter table varchar_single_partition change column vs vs varchar(10) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@varchar_single_partition +PREHOOK: Output: default@varchar_single_partition +POSTHOOK: query: alter table varchar_single_partition change column vs vs varchar(10) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@varchar_single_partition +POSTHOOK: Output: default@varchar_single_partition +PREHOOK: query: create table varchar_ctas_1 stored as orc as select vs, length(vs) as c1,reverse(vs) as c2 from varchar_single_partition where s='positive' +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@varchar_single_partition +PREHOOK: Input: default@varchar_single_partition@s=positive +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_ctas_1 +POSTHOOK: query: create table varchar_ctas_1 stored as orc as select vs, length(vs) as c1,reverse(vs) as c2 from varchar_single_partition where s='positive' +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@varchar_single_partition +POSTHOOK: Input: default@varchar_single_partition@s=positive +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_ctas_1 +POSTHOOK: Lineage: varchar_ctas_1.c1 EXPRESSION [(varchar_single_partition)varchar_single_partition.FieldSchema(name:vs, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar_ctas_1.c2 EXPRESSION [(varchar_single_partition)varchar_single_partition.FieldSchema(name:vs, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar_ctas_1.vs SIMPLE [(varchar_single_partition)varchar_single_partition.FieldSchema(name:vs, type:varchar(10), comment:null), ] +PREHOOK: query: explain vectorization detail +select * from varchar_ctas_1 order by vs, c1, c2 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * from varchar_ctas_1 order by vs, c1, c2 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_ctas_1 + Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:vs:varchar(10), 1:c1:int, 2:c2:string, 3:ROW__ID:struct] + Select Operator + expressions: vs (type: varchar(10)), c1 (type: int), c2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: vs:varchar(10), c1:int, c2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:varchar(10), KEY.reducesinkkey1:int, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from varchar_ctas_1 order by vs, c1, c2 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_ctas_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_ctas_1 order by vs, c1, c2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_ctas_1 +#### A masked pattern was here #### +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc +cvLH6Eat2y 16 y2taE6HLvc