diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 2b5c882..f5d5155 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -157,6 +157,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ update_two_cols.q,\ vector_between_in.q,\ vector_cast_constant.q,\ + vector_char_4.q,\ vector_char_simple.q,\ vector_data_types.q,\ vector_decimal_aggregate.q,\ @@ -168,6 +169,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_non_string_partition.q,\ vector_orderby_5.q,\ vector_string_concat.q,\ + vector_varchar_4.q,\ vector_varchar_simple.q,\ vectorization_0.q,\ vectorization_1.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index e6e9d04..14ef79e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -24,7 +24,9 @@ import java.util.Map; import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; @@ -404,8 +406,7 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { public void assignObjectValue(Object val, int destIndex) throws HiveException { if (val == null) { assignNull(destIndex); - } - else { + } else { Text bw = (Text) val; byte[] bytes = bw.getBytes(); assignBytes(bytes, 0, bw.getLength(), destIndex); @@ -413,6 +414,35 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { } }.init(outputBatch, (BytesColumnVector) destCol); break; + case VARCHAR: + outVCA = new VectorBytesColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } else { + HiveVarchar hiveVarchar = (HiveVarchar) val; + byte[] bytes = hiveVarchar.getValue().getBytes(); + assignBytes(bytes, 0, bytes.length, destIndex); + } + } + }.init(outputBatch, (BytesColumnVector) destCol); + break; + case CHAR: + outVCA = new VectorBytesColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } else { + // We store CHAR type stripped of pads. + HiveChar hiveChar = (HiveChar) val; + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + assignBytes(bytes, 0, bytes.length, destIndex); + } + } + }.init(outputBatch, (BytesColumnVector) destCol); + break; default: throw new HiveException("Incompatible Bytes vector column and primitive category " + category); diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q new file mode 100644 index 0000000..c824456 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -0,0 +1,51 @@ +SET hive.vectorized.execution.enabled=true; + +drop table if exists vectortab2k; +drop table if exists vectortab2korc; + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC; + +INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; + +drop table if exists char_lazy_binary_columnar; +create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; + +explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; + +-- insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; + +-- select count(*) as cnt from char_lazy_binary_columnar group by cs order by cnt asc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q new file mode 100644 index 0000000..c1e9c67 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -0,0 +1,51 @@ +SET hive.vectorized.execution.enabled=true; + +drop table if exists vectortab2k; +drop table if exists vectortab2korc; + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC; + +INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; + +drop table if exists varchar_lazy_binary_columnar; +create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; + +explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; + +-- insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc; + +-- select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_char_4.q.out ql/src/test/results/clientpositive/tez/vector_char_4.q.out new file mode 100644 index 0000000..8daf377 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_4.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: drop table if exists vectortab2k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2k +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists vectortab2korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: drop table if exists char_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + + Stage: Stage-3 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out new file mode 100644 index 0000000..02fa042 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out @@ -0,0 +1,175 @@ +PREHOOK: query: drop table if exists vectortab2k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2k +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists vectortab2korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: drop table if exists varchar_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists varchar_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + + Stage: Stage-3 + Stats-Aggr Operator + diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out new file mode 100644 index 0000000..58988bf --- /dev/null +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -0,0 +1,202 @@ +PREHOOK: query: drop table if exists vectortab2k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2k +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists vectortab2korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: drop table if exists char_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: create table char_lazy_binary_columnar(ct char(10), csi char(10), ci char(20), cb char(30), cf char(20), cd char(20), cs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out new file mode 100644 index 0000000..f7c9cd0 --- /dev/null +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -0,0 +1,202 @@ +PREHOOK: query: drop table if exists vectortab2k +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2k +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists vectortab2korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab2korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab2k +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab2k +PREHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: create table vectortab2korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab2korc +PREHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2k +PREHOOK: Output: default@vectortab2korc +POSTHOOK: query: INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2k +POSTHOOK: Output: default@vectortab2korc +POSTHOOK: Lineage: vectortab2korc.b SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.bo SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:bo, type:boolean, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.d SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dc SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dc, type:decimal(38,18), comment:null), ] +POSTHOOK: Lineage: vectortab2korc.dt SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:dt, type:date, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.f SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.i SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.s2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:s2, type:string, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] +PREHOOK: query: drop table if exists varchar_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists varchar_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab2korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### +