diff --git data/files/passwd_null data/files/passwd_null new file mode 100644 index 0000000..de9abe2 --- /dev/null +++ data/files/passwd_null @@ -0,0 +1,5 @@ +root:x:0:0:root:/root:\N +bin:x:1:1:bin:/bin:\N +daemon:x:2:2:daemon:/sbin:\N +adm:x:3:4:adm:/var/adm:\N +lp:x:4:7:lp:/var/spool/lpd:\N diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 458ad21..a3f174f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampUtils; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.io.filters.BloomFilter; @@ -1940,6 +1941,15 @@ Object nextVector(Object previousVector, long batchSize) throws IOException { } } } + // Default any ALTER TABLE ... ADD COLUMNS to NULL. + for (int i = fields.length; i < result.length; i++) { + ColumnVector colVector = result[i]; + if (colVector != null) { + colVector.noNulls = false; + colVector.isNull[0] = true; + colVector.isRepeating = true; + } + } return result; } diff --git ql/src/test/queries/clientpositive/vector_partition_diff_num_cols2.q ql/src/test/queries/clientpositive/vector_partition_diff_num_cols2.q new file mode 100644 index 0000000..1deb51e --- /dev/null +++ ql/src/test/queries/clientpositive/vector_partition_diff_num_cols2.q @@ -0,0 +1,42 @@ + +set hive.fetch.task.conversion=minimal; + +CREATE EXTERNAL TABLE `passwd_null`( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +row format delimited fields terminated by ':' +stored as textfile; + +LOAD DATA LOCAL INPATH '../../data/files/passwd_null' OVERWRITE INTO TABLE passwd_null; + +create table passwd_orc ( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +stored as orc; + +describe extended passwd_orc; + +insert into passwd_orc select * from passwd_null; + +select max(shell) from passwd_orc; + +alter table passwd_orc add columns (test string); + +describe extended passwd_orc; + +SET hive.vectorized.execution.enabled=true; + +explain +select max(test) from passwd_orc; + +select max(test) from passwd_orc; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols2.q.out ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols2.q.out new file mode 100644 index 0000000..c4d10ed --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_partition_diff_num_cols2.q.out @@ -0,0 +1,188 @@ +PREHOOK: query: CREATE EXTERNAL TABLE `passwd_null`( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +row format delimited fields terminated by ':' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@passwd_null +POSTHOOK: query: CREATE EXTERNAL TABLE `passwd_null`( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +row format delimited fields terminated by ':' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@passwd_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/passwd_null' OVERWRITE INTO TABLE passwd_null +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@passwd_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/passwd_null' OVERWRITE INTO TABLE passwd_null +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@passwd_null +PREHOOK: query: create table passwd_orc ( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@passwd_orc +POSTHOOK: query: create table passwd_orc ( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@passwd_orc +PREHOOK: query: describe extended passwd_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@passwd_orc +POSTHOOK: query: describe extended passwd_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@passwd_orc +name string +password string +uid int +gid int +gecos string +homedir string +shell string + +#### A masked pattern was here #### +PREHOOK: query: insert into passwd_orc select * from passwd_null +PREHOOK: type: QUERY +PREHOOK: Input: default@passwd_null +PREHOOK: Output: default@passwd_orc +POSTHOOK: query: insert into passwd_orc select * from passwd_null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@passwd_null +POSTHOOK: Output: default@passwd_orc +POSTHOOK: Lineage: passwd_orc.gecos SIMPLE [(passwd_null)passwd_null.FieldSchema(name:gecos, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.gid SIMPLE [(passwd_null)passwd_null.FieldSchema(name:gid, type:int, comment:null), ] +POSTHOOK: Lineage: passwd_orc.homedir SIMPLE [(passwd_null)passwd_null.FieldSchema(name:homedir, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.name SIMPLE [(passwd_null)passwd_null.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.password SIMPLE [(passwd_null)passwd_null.FieldSchema(name:password, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.shell SIMPLE [(passwd_null)passwd_null.FieldSchema(name:shell, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.uid SIMPLE [(passwd_null)passwd_null.FieldSchema(name:uid, type:int, comment:null), ] +PREHOOK: query: select max(shell) from passwd_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +POSTHOOK: query: select max(shell) from passwd_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +NULL +PREHOOK: query: alter table passwd_orc add columns (test string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@passwd_orc +PREHOOK: Output: default@passwd_orc +POSTHOOK: query: alter table passwd_orc add columns (test string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@passwd_orc +POSTHOOK: Output: default@passwd_orc +PREHOOK: query: describe extended passwd_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@passwd_orc +POSTHOOK: query: describe extended passwd_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@passwd_orc +name string +password string +uid int +gid int +gecos string +homedir string +shell string +test string + +#### A masked pattern was here #### +PREHOOK: query: explain +select max(test) from passwd_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(test) from passwd_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: passwd_orc + Statistics: Num rows: 7 Data size: 746 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: test (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 746 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(test) from passwd_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +POSTHOOK: query: select max(test) from passwd_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +NULL diff --git ql/src/test/results/clientpositive/vector_partition_diff_num_cols2.q.out ql/src/test/results/clientpositive/vector_partition_diff_num_cols2.q.out new file mode 100644 index 0000000..859ed91 --- /dev/null +++ ql/src/test/results/clientpositive/vector_partition_diff_num_cols2.q.out @@ -0,0 +1,181 @@ +PREHOOK: query: CREATE EXTERNAL TABLE `passwd_null`( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +row format delimited fields terminated by ':' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@passwd_null +POSTHOOK: query: CREATE EXTERNAL TABLE `passwd_null`( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +row format delimited fields terminated by ':' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@passwd_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/passwd_null' OVERWRITE INTO TABLE passwd_null +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@passwd_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/passwd_null' OVERWRITE INTO TABLE passwd_null +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@passwd_null +PREHOOK: query: create table passwd_orc ( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@passwd_orc +POSTHOOK: query: create table passwd_orc ( + `name` string, + `password` string, + `uid` int, + `gid` int, + `gecos` string, + `homedir` string, + `shell` string) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@passwd_orc +PREHOOK: query: describe extended passwd_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@passwd_orc +POSTHOOK: query: describe extended passwd_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@passwd_orc +name string +password string +uid int +gid int +gecos string +homedir string +shell string + +#### A masked pattern was here #### +PREHOOK: query: insert into passwd_orc select * from passwd_null +PREHOOK: type: QUERY +PREHOOK: Input: default@passwd_null +PREHOOK: Output: default@passwd_orc +POSTHOOK: query: insert into passwd_orc select * from passwd_null +POSTHOOK: type: QUERY +POSTHOOK: Input: default@passwd_null +POSTHOOK: Output: default@passwd_orc +POSTHOOK: Lineage: passwd_orc.gecos SIMPLE [(passwd_null)passwd_null.FieldSchema(name:gecos, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.gid SIMPLE [(passwd_null)passwd_null.FieldSchema(name:gid, type:int, comment:null), ] +POSTHOOK: Lineage: passwd_orc.homedir SIMPLE [(passwd_null)passwd_null.FieldSchema(name:homedir, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.name SIMPLE [(passwd_null)passwd_null.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.password SIMPLE [(passwd_null)passwd_null.FieldSchema(name:password, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.shell SIMPLE [(passwd_null)passwd_null.FieldSchema(name:shell, type:string, comment:null), ] +POSTHOOK: Lineage: passwd_orc.uid SIMPLE [(passwd_null)passwd_null.FieldSchema(name:uid, type:int, comment:null), ] +PREHOOK: query: select max(shell) from passwd_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +POSTHOOK: query: select max(shell) from passwd_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +NULL +PREHOOK: query: alter table passwd_orc add columns (test string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@passwd_orc +PREHOOK: Output: default@passwd_orc +POSTHOOK: query: alter table passwd_orc add columns (test string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@passwd_orc +POSTHOOK: Output: default@passwd_orc +PREHOOK: query: describe extended passwd_orc +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@passwd_orc +POSTHOOK: query: describe extended passwd_orc +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@passwd_orc +name string +password string +uid int +gid int +gecos string +homedir string +shell string +test string + +#### A masked pattern was here #### +PREHOOK: query: explain +select max(test) from passwd_orc +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(test) from passwd_orc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: passwd_orc + Statistics: Num rows: 7 Data size: 746 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: test (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 746 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(test) from passwd_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +POSTHOOK: query: select max(test) from passwd_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@passwd_orc +#### A masked pattern was here #### +NULL