diff --git a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt index 1aee9b3..9d5432f 100644 --- a/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt +++ b/ql/src/gen/vectorization/ExpressionTemplates/FilterColumnBetweenDynamicValue.txt @@ -27,7 +27,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; public class extends FilterColumnBetween { diff --git a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q index 446407d..be8e4af 100644 --- a/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q +++ b/ql/src/test/queries/clientpositive/vectorized_dynamic_semijoin_reduction2.q @@ -18,7 +18,10 @@ create table dsrv2_big stored as orc as cast(L_PARTKEY as decimal(10,1)) as partkey_decimal, cast(L_PARTKEY as double) as partkey_double, cast(l_shipdate as date) as shipdate_date, - cast(cast(l_shipdate as date) as timestamp) as shipdate_ts + cast(cast(l_shipdate as date) as timestamp) as shipdate_ts, + cast(l_shipdate as string) as shipdate_string, + cast(l_shipdate as char(10)) as shipdate_char, + cast(l_shipdate as varchar(10)) as shipdate_varchar from lineitem; create table dsrv2_small stored as orc as select * from dsrv2_big limit 20; analyze table dsrv2_big compute statistics; @@ -46,5 +49,17 @@ select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_date = b.ship EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts); select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_ts = b.shipdate_ts); +-- single key (string) +EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string); +select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string); + +-- single key (char) +EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char); +select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char); + +-- single key (varchar) +EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar); +select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar); + drop table dsrv2_big; drop table dsrv2_small; diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 27d8152..062fef6 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -4,7 +4,10 @@ PREHOOK: query: create table dsrv2_big stored as orc as cast(L_PARTKEY as decimal(10,1)) as partkey_decimal, cast(L_PARTKEY as double) as partkey_double, cast(l_shipdate as date) as shipdate_date, - cast(cast(l_shipdate as date) as timestamp) as shipdate_ts + cast(cast(l_shipdate as date) as timestamp) as shipdate_ts, + cast(l_shipdate as string) as shipdate_string, + cast(l_shipdate as char(10)) as shipdate_char, + cast(l_shipdate as varchar(10)) as shipdate_varchar from lineitem PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@lineitem @@ -16,7 +19,10 @@ POSTHOOK: query: create table dsrv2_big stored as orc as cast(L_PARTKEY as decimal(10,1)) as partkey_decimal, cast(L_PARTKEY as double) as partkey_double, cast(l_shipdate as date) as shipdate_date, - cast(cast(l_shipdate as date) as timestamp) as shipdate_ts + cast(cast(l_shipdate as date) as timestamp) as shipdate_ts, + cast(l_shipdate as string) as shipdate_string, + cast(l_shipdate as char(10)) as shipdate_char, + cast(l_shipdate as varchar(10)) as shipdate_varchar from lineitem POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@lineitem @@ -25,8 +31,11 @@ POSTHOOK: Output: default@dsrv2_big POSTHOOK: Lineage: dsrv2_big.partkey_bigint EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ] POSTHOOK: Lineage: dsrv2_big.partkey_decimal EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ] POSTHOOK: Lineage: dsrv2_big.partkey_double EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: dsrv2_big.shipdate_char EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: dsrv2_big.shipdate_date EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] +POSTHOOK: Lineage: dsrv2_big.shipdate_string SIMPLE [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] POSTHOOK: Lineage: dsrv2_big.shipdate_ts EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] +POSTHOOK: Lineage: dsrv2_big.shipdate_varchar EXPRESSION [(lineitem)lineitem.FieldSchema(name:l_shipdate, type:string, comment:null), ] PREHOOK: query: create table dsrv2_small stored as orc as select * from dsrv2_big limit 20 PREHOOK: type: CREATETABLE_AS_SELECT PREHOOK: Input: default@dsrv2_big @@ -40,8 +49,11 @@ POSTHOOK: Output: default@dsrv2_small POSTHOOK: Lineage: dsrv2_small.partkey_bigint SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_bigint, type:bigint, comment:null), ] POSTHOOK: Lineage: dsrv2_small.partkey_decimal SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_decimal, type:decimal(10,1), comment:null), ] POSTHOOK: Lineage: dsrv2_small.partkey_double SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:partkey_double, type:double, comment:null), ] +POSTHOOK: Lineage: dsrv2_small.shipdate_char SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_char, type:char(10), comment:null), ] POSTHOOK: Lineage: dsrv2_small.shipdate_date SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_date, type:date, comment:null), ] +POSTHOOK: Lineage: dsrv2_small.shipdate_string SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_string, type:string, comment:null), ] POSTHOOK: Lineage: dsrv2_small.shipdate_ts SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_ts, type:timestamp, comment:null), ] +POSTHOOK: Lineage: dsrv2_small.shipdate_varchar SIMPLE [(dsrv2_big)dsrv2_big.FieldSchema(name:shipdate_varchar, type:varchar(10), comment:null), ] PREHOOK: query: analyze table dsrv2_big compute statistics PREHOOK: type: QUERY PREHOOK: Input: default@dsrv2_big @@ -247,7 +259,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(10,1)) Statistics: Num rows: 100 Data size: 11200 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: @@ -655,7 +667,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: timestamp) Statistics: Num rows: 100 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: @@ -754,6 +766,414 @@ POSTHOOK: Input: default@dsrv2_big POSTHOOK: Input: default@dsrv2_small #### A masked pattern was here #### 23 +PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (shipdate_string is not null and shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, DynamicValue(RS_7_b_shipdate_string_bloom_filter))) (type: boolean) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (shipdate_string is not null and shipdate_string BETWEEN DynamicValue(RS_7_b_shipdate_string_min) AND DynamicValue(RS_7_b_shipdate_string_max) and in_bloom_filter(shipdate_string, DynamicValue(RS_7_b_shipdate_string_bloom_filter))) (type: boolean) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: shipdate_string (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: shipdate_string is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: shipdate_string is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: shipdate_string (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string) +PREHOOK: type: QUERY +PREHOOK: Input: default@dsrv2_big +PREHOOK: Input: default@dsrv2_small +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_string = b.shipdate_string) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dsrv2_big +POSTHOOK: Input: default@dsrv2_small +#### A masked pattern was here #### +23 +PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (shipdate_char is not null and shipdate_char BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, DynamicValue(RS_7_b_shipdate_char_bloom_filter))) (type: boolean) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (shipdate_char is not null and shipdate_char BETWEEN DynamicValue(RS_7_b_shipdate_char_min) AND DynamicValue(RS_7_b_shipdate_char_max) and in_bloom_filter(shipdate_char, DynamicValue(RS_7_b_shipdate_char_bloom_filter))) (type: boolean) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: shipdate_char (type: char(10)) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: + + Map-reduce partition columns: _col0 (type: char(10)) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: shipdate_char is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: shipdate_char is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: shipdate_char (type: char(10)) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: + + Map-reduce partition columns: _col0 (type: char(10)) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: char(10)) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: char(10)) + 1 _col0 (type: char(10)) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char) +PREHOOK: type: QUERY +PREHOOK: Input: default@dsrv2_big +PREHOOK: Input: default@dsrv2_small +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_char = b.shipdate_char) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dsrv2_big +POSTHOOK: Input: default@dsrv2_small +#### A masked pattern was here #### +23 +PREHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + filterExpr: (shipdate_varchar is not null and shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, DynamicValue(RS_7_b_shipdate_varchar_bloom_filter))) (type: boolean) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (shipdate_varchar is not null and shipdate_varchar BETWEEN DynamicValue(RS_7_b_shipdate_varchar_min) AND DynamicValue(RS_7_b_shipdate_varchar_max) and in_bloom_filter(shipdate_varchar, DynamicValue(RS_7_b_shipdate_varchar_bloom_filter))) (type: boolean) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: shipdate_varchar (type: varchar(10)) + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(10)) + Statistics: Num rows: 100 Data size: 9400 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: shipdate_varchar is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: shipdate_varchar is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: shipdate_varchar (type: varchar(10)) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(10)) + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: varchar(10)) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=15) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: varchar(10)) + 1 _col0 (type: varchar(10)) + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=15) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar) +PREHOOK: type: QUERY +PREHOOK: Input: default@dsrv2_big +PREHOOK: Input: default@dsrv2_small +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from dsrv2_big a join dsrv2_small b on (a.shipdate_varchar = b.shipdate_varchar) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dsrv2_big +POSTHOOK: Input: default@dsrv2_small +#### A masked pattern was here #### +23 PREHOOK: query: drop table dsrv2_big PREHOOK: type: DROPTABLE PREHOOK: Input: default@dsrv2_big diff --git a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 55cfb7b..926321e 100644 --- a/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ b/vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -1418,7 +1418,7 @@ private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exce getValueMethod = ""; conversionMethod = ""; } else if (operandType.equals("decimal")) { - defaultValue = "null"; + defaultValue = "HiveDecimal.ZERO"; vectorType = "HiveDecimal"; getPrimitiveMethod = "getHiveDecimal"; getValueMethod = ""; @@ -1430,13 +1430,13 @@ private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exce getValueMethod = ".getBytes()"; conversionMethod = ""; } else if (operandType.equals("char")) { - defaultValue = "null"; + defaultValue = "new HiveChar(\"\", 1)"; vectorType = "byte[]"; getPrimitiveMethod = "getHiveChar"; getValueMethod = ".getStrippedValue().getBytes()"; // Does vectorization use stripped char values? conversionMethod = ""; } else if (operandType.equals("varchar")) { - defaultValue = "null"; + defaultValue = "new HiveVarchar(\"\", 1)"; vectorType = "byte[]"; getPrimitiveMethod = "getHiveVarchar"; getValueMethod = ".getValue().getBytes()"; @@ -1450,7 +1450,7 @@ private void generateFilterColumnBetweenDynamicValue(String[] tdesc) throws Exce // Special case - Date requires its own specific BetweenDynamicValue class, but derives from FilterLongColumnBetween typeName = "Long"; } else if (operandType.equals("timestamp")) { - defaultValue = "null"; + defaultValue = "new Timestamp(0)"; vectorType = "Timestamp"; getPrimitiveMethod = "getTimestamp"; getValueMethod = "";