diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 39ee9d3..643eee6 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -412,6 +412,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_smb_main.q,\ tez_smb_1.q,\ tez_smb_empty.q,\ + vector_join_part_col_char.q,\ vectorized_dynamic_partition_pruning.q,\ tez_multi_union.q,\ tez_join.q,\ @@ -454,6 +455,7 @@ minillap.query.files=bucket_map_join_tez1.q,\ tez_union_group_by.q,\ tez_smb_main.q,\ tez_smb_1.q,\ + vector_join_part_col_char.q,\ vectorized_dynamic_partition_pruning.q,\ tez_multi_union.q,\ tez_join.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index a904a50..0ec91b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -25,6 +25,8 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -159,14 +161,17 @@ public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, PartitionDes String key = vrbCtx.rowColumnNames[vrbCtx.dataColumnCount + i]; // Create a Standard java object Inspector + TypeInfo partColTypeInfo = vrbCtx.rowColumnTypeInfos[vrbCtx.dataColumnCount + i]; ObjectInspector objectInspector = - TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( - vrbCtx.rowColumnTypeInfos[vrbCtx.dataColumnCount + i]); + TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(partColTypeInfo); objectValue = ObjectInspectorConverters. getConverter(PrimitiveObjectInspectorFactory. javaStringObjectInspector, objectInspector). convert(partSpec.get(key)); + if (partColTypeInfo instanceof CharTypeInfo) { + objectValue = ((HiveChar) objectValue).getStrippedValue(); + } } partitionValues[i] = objectValue; } diff --git a/ql/src/test/queries/clientpositive/vector_join_part_col_char.q b/ql/src/test/queries/clientpositive/vector_join_part_col_char.q new file mode 100644 index 0000000..45a9165 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_join_part_col_char.q @@ -0,0 +1,27 @@ +set hive.mapred.mode=nonstrict; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.vectorized.execution.enabled=true; + +drop table if exists char_part_tbl1 ; +drop table if exists char_part_tbl2; + +create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true'); +insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50); + +create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc; +create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc; + +insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5; +insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5; +insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5; +insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3; + +show partitions char_tbl1; +show partitions char_tbl2; + +explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); +select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); + +set hive.vectorized.execution.enabled=false; +select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa); diff --git a/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out new file mode 100644 index 0000000..d72ebe1 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_join_part_col_char.q.out @@ -0,0 +1,224 @@ +PREHOOK: query: drop table if exists char_part_tbl1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_part_tbl2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab +POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab +PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@studenttab +POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@studenttab +POSTHOOK: Lineage: studenttab.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: studenttab.gpa EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: studenttab.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl1 +POSTHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl1 +PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl2 +POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl2 +PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: show partitions char_tbl1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl1 +POSTHOOK: query: show partitions char_tbl1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl1 +gpa=2.5 +gpa=3.5 +PREHOOK: query: show partitions char_tbl2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl2 +POSTHOOK: query: show partitions char_tbl2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl2 +gpa=3 +gpa=3.5 +PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: c1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: char(50)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: char(50)) + sort order: + + Map-reduce partition columns: _col2 (type: char(50)) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) + Select Operator + expressions: _col2 (type: char(50)) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: char(50)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: c2 + Partition key expr: gpa + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Target column: gpa + Target Vertex: Map 3 + Execution mode: vectorized, llap + Map 3 + Map Operator Tree: + TableScan + alias: c2 + Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: char(5)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: char(50)) + sort order: + + Map-reduce partition columns: _col2 (type: char(50)) + Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: char(5)) + Execution mode: vectorized, llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: char(50)) + 1 _col2 (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 diff --git a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out new file mode 100644 index 0000000..f3d5931 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -0,0 +1,197 @@ +PREHOOK: query: drop table if exists char_part_tbl1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_part_tbl2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab +POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab +PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@studenttab +POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@studenttab +POSTHOOK: Lineage: studenttab.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: studenttab.gpa EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: studenttab.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl1 +POSTHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl1 +PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl2 +POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl2 +PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: show partitions char_tbl1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl1 +POSTHOOK: query: show partitions char_tbl1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl1 +gpa=2.5 +gpa=3.5 +PREHOOK: query: show partitions char_tbl2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl2 +POSTHOOK: query: show partitions char_tbl2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl2 +gpa=3 +gpa=3.5 +PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 2 + File Output Operator [FS_10] + compressed:false + Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Merge Join Operator [MERGEJOIN_21] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col2 (type: char(50))","1":"_col2 (type: char(50))"} + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [SIMPLE_EDGE] vectorized + | Reduce Output Operator [RS_23] + | key expressions:_col2 (type: char(50)) + | Map-reduce partition columns:_col2 (type: char(50)) + | sort order:+ + | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: int) + | Select Operator [OP_22] + | outputColumnNames:["_col0","_col1","_col2"] + | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:c1 + | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + | Dynamic Partitioning Event Operator [EVENT_20] + | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [OP_25] + | keys:_col0 (type: char(50)) + | outputColumnNames:["_col0"] + | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + | Select Operator [OP_24] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + | Please refer to the previous Select Operator [OP_22] + |<-Map 3 [SIMPLE_EDGE] vectorized + Reduce Output Operator [RS_27] + key expressions:_col2 (type: char(50)) + Map-reduce partition columns:_col2 (type: char(50)) + sort order:+ + Statistics:Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: string), _col1 (type: int), _col2 (type: char(5)) + Select Operator [OP_26] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:c2 + Statistics:Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 diff --git a/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out new file mode 100644 index 0000000..b6631f7 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_join_part_col_char.q.out @@ -0,0 +1,198 @@ +PREHOOK: query: drop table if exists char_part_tbl1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_part_tbl2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_part_tbl2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@studenttab +POSTHOOK: query: create table studenttab(name string, age int, gpa double) clustered by (age) into 2 buckets stored as orc tblproperties('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@studenttab +PREHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@studenttab +POSTHOOK: query: insert into table studenttab values ('calvin garcia',56,2.50), ('oscar miller',66,3.00), ('(yuri xylophone',30,2.74),('alice underhill',46,3.50) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@studenttab +POSTHOOK: Lineage: studenttab.age EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: studenttab.gpa EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: studenttab.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl1 +POSTHOOK: query: create table char_tbl1(name string, age int) partitioned by(gpa char(50)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl1 +PREHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_tbl2 +POSTHOOK: query: create table char_tbl2(name string, age int) partitioned by(gpa char(5)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_tbl2 +PREHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=3.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: query: insert into table char_tbl1 partition(gpa='2.5') select name, age from studenttab where gpa = 2.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl1@gpa=2.5 +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl1 PARTITION(gpa=2.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3.5') select name, age from studenttab where gpa = 3.5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3.5 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3.5 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab +PREHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: query: insert into table char_tbl2 partition(gpa='3') select name, age from studenttab where gpa = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab +POSTHOOK: Output: default@char_tbl2@gpa=3 +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).age SIMPLE [(studenttab)studenttab.FieldSchema(name:age, type:int, comment:null), ] +POSTHOOK: Lineage: char_tbl2 PARTITION(gpa=3 ).name SIMPLE [(studenttab)studenttab.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: show partitions char_tbl1 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl1 +POSTHOOK: query: show partitions char_tbl1 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl1 +gpa=2.5 +gpa=3.5 +PREHOOK: query: show partitions char_tbl2 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@char_tbl2 +POSTHOOK: query: show partitions char_tbl2 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@char_tbl2 +gpa=3 +gpa=3.5 +PREHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +POSTHOOK: query: explain select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: c1 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: char(50)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: char(50)) + sort order: + + Map-reduce partition columns: _col2 (type: char(50)) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) + TableScan + alias: c2 + Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: name (type: string), age (type: int), gpa (type: char(5)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: char(50)) + sort order: + + Map-reduce partition columns: _col2 (type: char(50)) + Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: char(5)) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: char(50)) + 1 _col2 (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5 +PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +PREHOOK: type: QUERY +PREHOOK: Input: default@char_tbl1 +PREHOOK: Input: default@char_tbl1@gpa=2.5 +PREHOOK: Input: default@char_tbl1@gpa=3.5 +PREHOOK: Input: default@char_tbl2 +PREHOOK: Input: default@char_tbl2@gpa=3 +PREHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +POSTHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_tbl1 +POSTHOOK: Input: default@char_tbl1@gpa=2.5 +POSTHOOK: Input: default@char_tbl1@gpa=3.5 +POSTHOOK: Input: default@char_tbl2 +POSTHOOK: Input: default@char_tbl2@gpa=3 +POSTHOOK: Input: default@char_tbl2@gpa=3.5 +#### A masked pattern was here #### +alice underhill 46 3.5 alice underhill 46 3.5