diff --git data/files/vectortab10korc data/files/vectortab10korc new file mode 100644 index 0000000..e28ee6d Binary files /dev/null and data/files/vectortab10korc differ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index e6e9d04..ce20fe0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -24,12 +24,15 @@ import java.util.Map; import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -404,8 +407,7 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { public void assignObjectValue(Object val, int destIndex) throws HiveException { if (val == null) { assignNull(destIndex); - } - else { + } else { Text bw = (Text) val; byte[] bytes = bw.getBytes(); assignBytes(bytes, 0, bw.getLength(), destIndex); @@ -413,6 +415,35 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { } }.init(outputBatch, (BytesColumnVector) destCol); break; + case VARCHAR: + outVCA = new VectorBytesColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } else { + HiveVarchar hiveVarchar = (HiveVarchar) val; + byte[] bytes = hiveVarchar.getValue().getBytes(); + assignBytes(bytes, 0, bytes.length, destIndex); + } + } + }.init(outputBatch, (BytesColumnVector) destCol); + break; + case CHAR: + outVCA = new VectorBytesColumnAssign() { + @Override + public void assignObjectValue(Object val, int destIndex) throws HiveException { + if (val == null) { + assignNull(destIndex); + } else { + // We store CHAR type stripped of pads. + HiveChar hiveChar = (HiveChar) val; + byte[] bytes = hiveChar.getStrippedValue().getBytes(); + assignBytes(bytes, 0, bytes.length, destIndex); + } + } + }.init(outputBatch, (BytesColumnVector) destCol); + break; default: throw new HiveException("Incompatible Bytes vector column and primitive category " + category); diff --git ql/src/test/queries/clientpositive/vector_char_4.q ql/src/test/queries/clientpositive/vector_char_4.q new file mode 100644 index 0000000..46ed844 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_4.q @@ -0,0 +1,32 @@ +SET hive.vectorized.execution.enabled=true; + +drop table if exists vectortab10korc; +create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc; + +select count(*) from vectortab10korc; + +drop table if exists char_lazy_binary_columnar; +create table char_lazy_binary_columnar(vt char(10), vsi char(10), vi char(20), vb char(30), vf char(20),vd char(20),vs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; + +explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; + +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; + +select count(*) as cnt from char_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_varchar_4.q ql/src/test/queries/clientpositive/vector_varchar_4.q new file mode 100644 index 0000000..2103dfa --- /dev/null +++ ql/src/test/queries/clientpositive/vector_varchar_4.q @@ -0,0 +1,32 @@ +SET hive.vectorized.execution.enabled=true; + +drop table if exists vectortab10korc; +create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc; + +select count(*) from vectortab10korc; + +drop table if exists varchar_lazy_binary_columnar; +create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile; + +explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; + +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc; + +select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vector_char_4.q.out ql/src/test/results/clientpositive/tez/vector_char_4.q.out new file mode 100644 index 0000000..ea169f0 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_4.q.out @@ -0,0 +1,172 @@ +PREHOOK: query: drop table if exists vectortab10korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab10korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: select count(*) from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +10003 +PREHOOK: query: drop table if exists char_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_lazy_binary_columnar(vt char(10), vsi char(10), vi char(20), vb char(30), vf char(20),vd char(20),vs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: create table char_lazy_binary_columnar(vt char(10), vsi char(10), vi char(20), vb char(30), vf char(20),vd char(20),vs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab10korc + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: Lineage: char_lazy_binary_columnar.vb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vsi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vt EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by vs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by vs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +3 +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +501 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out new file mode 100644 index 0000000..39512cb --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_varchar_4.q.out @@ -0,0 +1,171 @@ +PREHOOK: query: drop table if exists vectortab10korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab10korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: select count(*) from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +10003 +PREHOOK: query: drop table if exists varchar_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists varchar_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab10korc + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vsi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vt EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +504 diff --git ql/src/test/results/clientpositive/vector_char_4.q.out ql/src/test/results/clientpositive/vector_char_4.q.out new file mode 100644 index 0000000..9c7c797 --- /dev/null +++ ql/src/test/results/clientpositive/vector_char_4.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: drop table if exists vectortab10korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab10korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: select count(*) from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +10003 +PREHOOK: query: drop table if exists char_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_lazy_binary_columnar(vt char(10), vsi char(10), vi char(20), vb char(30), vf char(20),vd char(20),vs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: create table char_lazy_binary_columnar(vt char(10), vsi char(10), vi char(20), vb char(30), vf char(20),vd char(20),vs char(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab10korc + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.char_lazy_binary_columnar + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: query: insert overwrite table char_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@char_lazy_binary_columnar +POSTHOOK: Lineage: char_lazy_binary_columnar.vb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vsi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: char_lazy_binary_columnar.vt EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by vs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from char_lazy_binary_columnar group by vs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_lazy_binary_columnar +#### A masked pattern was here #### +3 +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +501 diff --git ql/src/test/results/clientpositive/vector_varchar_4.q.out ql/src/test/results/clientpositive/vector_varchar_4.q.out new file mode 100644 index 0000000..b55a03f --- /dev/null +++ ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -0,0 +1,198 @@ +PREHOOK: query: drop table if exists vectortab10korc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists vectortab10korc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: create table vectortab10korc( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) + stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@vectortab10korc +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/vectortab10korc' OVERWRITE INTO TABLE vectortab10korc +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@vectortab10korc +PREHOOK: query: select count(*) from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +#### A masked pattern was here #### +10003 +PREHOOK: query: drop table if exists varchar_lazy_binary_columnar +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists varchar_lazy_binary_columnar +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: create table varchar_lazy_binary_columnar(vt varchar(10), vsi varchar(10), vi varchar(20), vb varchar(30), vf varchar(20),vd varchar(20),vs varchar(50)) row format serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@varchar_lazy_binary_columnar +PREHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab10korc + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3524 Data size: 465293 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + Execution mode: vectorized + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe + name: default.varchar_lazy_binary_columnar + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab10korc +PREHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: query: insert overwrite table varchar_lazy_binary_columnar select t, si, i, b, f, d, s from vectortab10korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab10korc +POSTHOOK: Output: default@varchar_lazy_binary_columnar +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vb EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:b, type:bigint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vd EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:d, type:double, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vf EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:f, type:float, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vs EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:s, type:string, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vsi EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:si, type:smallint, comment:null), ] +POSTHOOK: Lineage: varchar_lazy_binary_columnar.vt EXPRESSION [(vectortab10korc)vectortab10korc.FieldSchema(name:t, type:tinyint, comment:null), ] +PREHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +POSTHOOK: query: select count(*) as cnt from varchar_lazy_binary_columnar group by vs order by cnt asc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_lazy_binary_columnar +#### A masked pattern was here #### +321 +325 +328 +339 +350 +352 +352 +353 +355 +355 +361 +362 +362 +366 +367 +371 +372 +372 +379 +379 +381 +382 +395 +404 +406 +410 +504