diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 80e6aee..4c98a59 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -705,6 +705,7 @@ minillaplocal.query.files=\ union_remove_26.q,\ union_top_level.q,\ vector_auto_smb_mapjoin_14.q,\ + vector_char_varchar_1.q,\ vector_complex_all.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 470e761..8ea625e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -179,6 +179,10 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation() { return dataTypePhysicalVariation; } + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } + public int getMaxLength() { return maxLength; } @@ -339,13 +343,25 @@ private void initTopLevelField(int logicalColumnIndex, int projectionColumnNum, /* * Initialize the conversion related arrays. Assumes initTopLevelField has already been called. */ - private void addTopLevelConversion(int logicalColumnIndex) { + private void addTopLevelConversion(int logicalColumnIndex, TypeInfo targetTypeInfo) { final Field field = topLevelFields[logicalColumnIndex]; field.setIsConvert(true); if (field.getIsPrimitive()) { + PrimitiveTypeInfo targetPrimitiveTypeInfo = (PrimitiveTypeInfo) targetTypeInfo; + switch (targetPrimitiveTypeInfo.getPrimitiveCategory()) { + case CHAR: + field.setMaxLength(((CharTypeInfo) targetPrimitiveTypeInfo).getLength()); + break; + case VARCHAR: + field.setMaxLength(((VarcharTypeInfo) targetPrimitiveTypeInfo).getLength()); + break; + default: + // No additional data type specific setting. + break; + } field.setConversionWritable( VectorizedBatchUtil.getPrimitiveWritable(field.getPrimitiveCategory())); } @@ -482,7 +498,7 @@ public void initConversion(TypeInfo[] targetTypeInfos, initTopLevelField(i, i, sourceTypeInfo, dataTypePhysicalVariations[i]); // UNDONE: No for List and Map; Yes for Struct and Union when field count different... - addTopLevelConversion(i); + addTopLevelConversion(i, targetTypeInfo); atLeastOneConvert = true; } diff --git ql/src/test/queries/clientpositive/vector_char_varchar_1.q ql/src/test/queries/clientpositive/vector_char_varchar_1.q new file mode 100644 index 0000000..58ec7a7 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_varchar_1.q @@ -0,0 +1,30 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; + +create table varchar_table(vs varchar(50)); +insert into table varchar_table + values ("blue"), ("a long sentence"), ("more sunshine and rain"), ("exactly 10"), ("longer 10 "), ("tells the truth"); +alter table varchar_table change column vs vs varchar(10); + + +explain vectorization detail +create table varchar_ctas_1 as select length(vs),reverse(vs) from varchar_table; + +create table varchar_ctas_1 as select length(vs),reverse(vs) from varchar_table; + +select * from varchar_ctas_1 order by 2; + + +create table char_table(vs char(50)); +insert into table char_table + values (" yellow"), ("some words and spaces "), ("less sunshine and rain"), ("exactly 10"), ("sunlight moon"), ("begs the truth"); +alter table char_table change column vs vs char(10); + +explain vectorization detail +create table char_ctas_1 as select length(vs),reverse(vs) from char_table; + +create table char_ctas_1 as select length(vs),reverse(vs) from char_table; + +select * from char_ctas_1 order by 2; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out new file mode 100644 index 0000000..83473ad --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_char_varchar_1.q.out @@ -0,0 +1,296 @@ +PREHOOK: query: create table varchar_table(vs varchar(50)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_table +POSTHOOK: query: create table varchar_table(vs varchar(50)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_table +PREHOOK: query: insert into table varchar_table + values ("blue"), ("a long sentence"), ("more sunshine and rain"), ("exactly 10"), ("longer 10 "), ("tells the truth") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@varchar_table +POSTHOOK: query: insert into table varchar_table + values ("blue"), ("a long sentence"), ("more sunshine and rain"), ("exactly 10"), ("longer 10 "), ("tells the truth") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@varchar_table +POSTHOOK: Lineage: varchar_table.vs SCRIPT [] +_col0 +PREHOOK: query: alter table varchar_table change column vs vs varchar(10) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@varchar_table +PREHOOK: Output: default@varchar_table +POSTHOOK: query: alter table varchar_table change column vs vs varchar(10) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@varchar_table +POSTHOOK: Output: default@varchar_table +PREHOOK: query: explain vectorization detail +create table varchar_ctas_1 as select length(vs),reverse(vs) from varchar_table +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization detail +create table varchar_ctas_1 as select length(vs),reverse(vs) from varchar_table +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: varchar_table + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:vs:varchar(10), 1:ROW__ID:struct] + Select Operator + expressions: length(vs) (type: int), reverse(vs) (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: StringLength(col 0:varchar(10)) -> 2:int, VectorUDFAdaptor(reverse(vs)) -> 3:string + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.varchar_ctas_1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: vs:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: _c0 int, _c1 string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.varchar_ctas_1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table varchar_ctas_1 as select length(vs),reverse(vs) from varchar_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@varchar_table +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_ctas_1 +POSTHOOK: query: create table varchar_ctas_1 as select length(vs),reverse(vs) from varchar_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@varchar_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_ctas_1 +POSTHOOK: Lineage: varchar_ctas_1._c0 EXPRESSION [(varchar_table)varchar_table.FieldSchema(name:vs, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar_ctas_1._c1 EXPRESSION [(varchar_table)varchar_table.FieldSchema(name:vs, type:varchar(10), comment:null), ] +_c0 _c1 +PREHOOK: query: select * from varchar_ctas_1 order by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_ctas_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_ctas_1 order by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_ctas_1 +#### A masked pattern was here #### +varchar_ctas_1._c0 varchar_ctas_1._c1 +10 01 regnol +10 eht sllet +10 01 yltcaxe +4 eulb +10 hsnus erom +10 nes gnol a +PREHOOK: query: create table char_table(vs char(50)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_table +POSTHOOK: query: create table char_table(vs char(50)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_table +PREHOOK: query: insert into table char_table + values (" yellow"), ("some words and spaces "), ("less sunshine and rain"), ("exactly 10"), ("sunlight moon"), ("begs the truth") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@char_table +POSTHOOK: query: insert into table char_table + values (" yellow"), ("some words and spaces "), ("less sunshine and rain"), ("exactly 10"), ("sunlight moon"), ("begs the truth") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@char_table +POSTHOOK: Lineage: char_table.vs SCRIPT [] +_col0 +PREHOOK: query: alter table char_table change column vs vs char(10) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@char_table +PREHOOK: Output: default@char_table +POSTHOOK: query: alter table char_table change column vs vs char(10) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@char_table +POSTHOOK: Output: default@char_table +PREHOOK: query: explain vectorization detail +create table char_ctas_1 as select length(vs),reverse(vs) from char_table +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization detail +create table char_ctas_1 as select length(vs),reverse(vs) from char_table +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_table + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:vs:char(10), 1:ROW__ID:struct] + Select Operator + expressions: length(vs) (type: int), reverse(vs) (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [2, 3] + selectExpressions: StringLength(col 0:char(10)) -> 2:int, VectorUDFAdaptor(reverse(vs)) -> 3:string + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 564 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.char_ctas_1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: vs:char(10) + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Create Table Operator: + Create Table + columns: _c0 int, _c1 string + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.char_ctas_1 + + Stage: Stage-3 + Stats Work + Basic Stats Work: + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: create table char_ctas_1 as select length(vs),reverse(vs) from char_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@char_table +PREHOOK: Output: database:default +PREHOOK: Output: default@char_ctas_1 +POSTHOOK: query: create table char_ctas_1 as select length(vs),reverse(vs) from char_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@char_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_ctas_1 +POSTHOOK: Lineage: char_ctas_1._c0 EXPRESSION [(char_table)char_table.FieldSchema(name:vs, type:char(10), comment:null), ] +POSTHOOK: Lineage: char_ctas_1._c1 EXPRESSION [(char_table)char_table.FieldSchema(name:vs, type:char(10), comment:null), ] +_c0 _c1 +PREHOOK: query: select * from char_ctas_1 order by 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_ctas_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from char_ctas_1 order by 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_ctas_1 +#### A masked pattern was here #### +char_ctas_1._c0 char_ctas_1._c1 +10 01 yltcaxe +10 hsnus ssel +10 m thgilnus +10 sdrow emos +10 t eht sgeb +8 wolley