diff --git ql/src/test/queries/clientpositive/vector_case_when_3.q ql/src/test/queries/clientpositive/vector_case_when_3.q new file mode 100644 index 0000000000..e179784892 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_case_when_3.q @@ -0,0 +1,58 @@ +--! qt:dataset:lineitem +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.vectorized.execution.enabled=true; + +CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt; +CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt; +INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); + +select L_COMMENT from lineitem_test; + +explain vectorization detail +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10; + +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10; + +set hive.vectorized.execution.enabled=false; + +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10; diff --git ql/src/test/results/clientpositive/tez/vector_case_when_3.q.out ql/src/test/results/clientpositive/tez/vector_case_when_3.q.out new file mode 100644 index 0000000000..67d15ac141 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_case_when_3.q.out @@ -0,0 +1,463 @@ +PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_test_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_test_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ] +lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment +PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION [] +_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 +PREHOOK: query: select L_COMMENT from lineitem_test +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select L_COMMENT from lineitem_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +l_comment +egular courts above the +ly final dependencies: slyly bold +riously. regular, express dep +lites. fluffily even de + pending foxes. slyly re +arefully slyly ex +ven requests. deposits breach a +ongside of the furiously brave acco + unusual accounts. eve +nal foxes wake. +y. fluffily pending d +ages nag slyly pending +ges sleep after the caref +- quickly regular packages sleep. idly +ts wake furiously +sts use slyly quickly special instruc +eodolites. fluffily unusual +p furiously special foxes +ss pinto beans wake against th +es. instructions + unusual reques +. slyly special requests haggl +ns haggle carefully ironic deposits. bl +jole. excuses wake carefully alongside of +ithely regula +sleep quickly. req +lithely regular deposits. fluffily + express accounts wake according to the +e slyly final pac +symptotes nag according to the ironic depo + gifts cajole carefully. +ng to the furiously ironic package +gular theodolites +. stealthily bold exc +unusual packages doubt caref +nic accounts. deposits are alon +thely slyly p +ar foxes sleep +, regular tithe +s are carefully against the f + the carefully regular + quickly unti +. silent, unusual deposits boost +ly alongside of + careful courts. special +luffily regular requests. slyly final acco +the final requests. ca +iously ste +s. blithely unusual theodolites am +eodolites. careful +ckages across the slyly silent +he carefully e +heodolites sleep silently pending foxes. ac +yly regular i +quickly ironic fox +ch slyly final, thin platelets. +pending deposits nag even packages. ca + ideas. special, r +bove the even packages. accounts nag carefu +ut the unusual accounts sleep at the bo + regular de + cajole thinly expres + even packages cajole +y unusual packages thrash pinto +se quickly above the even, express reques +ly regular deposit +ultipliers +fully special instructions cajole. furious + requests are unusual, regular pinto +egular dependencies affix ironically along + excuses integrate fluffily +ccounts. deposits use. furiously +oxes are slyly blithely fin +eposits nag special ideas. furiousl +regular epitaphs. carefully even ideas hag +s sleep carefully bold, +final, pending instr + blithely final d +tect regular, speci +nding accounts ca +ggle. carefully pending dependenc +lyly special packag +quickly. fluffily unusual theodolites c +alongside of the deposits. fur +n accounts are. q + packages wake pending accounts. +ckly. slyly +y. pinto beans haggle after the + ironic packages believe blithely a + serve quickly fluffily bold deposi +l accounts sleep across the pack +s cajole. +ep-- carefully reg +e quickly even ideas. furiou +ayers cajole against the furiously +ic requests boost carefully quic +gifts. furiously ironic packages cajole. + pending, regular accounts s +. unusual instructions against + cajole furiously. blithely ironic ideas +NULL +PREHOOK: query: explain vectorization detail +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain vectorization detail +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 11211 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2)/DECIMAL_64, 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: CASE WHEN ((l_comment like '%quickly%')) THEN ('quickly') ELSE (l_comment) END (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [18] + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val quicklycol 15:string)(children: SelectStringColLikeStringScalar(col 15:string) -> 17:boolean) -> 18:string + Statistics: Num rows: 101 Data size: 11211 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 18:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [15] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2)/DECIMAL_64, l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:string + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: - + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0] + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +name c +quickly 9 +y. pinto beans haggle after the 1 + cajole furiously. blithely ironic ideas 1 + cajole thinly expres 1 +NULL 1 + even packages cajole 1 + excuses integrate fluffily 1 +yly regular i 1 + gifts cajole carefully. 1 + ideas. special, r 1 +PREHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +name c +quickly 9 +y. pinto beans haggle after the 1 + cajole furiously. blithely ironic ideas 1 + cajole thinly expres 1 +NULL 1 + even packages cajole 1 + excuses integrate fluffily 1 +yly regular i 1 + gifts cajole carefully. 1 + ideas. special, r 1 diff --git ql/src/test/results/clientpositive/vector_case_when_3.q.out ql/src/test/results/clientpositive/vector_case_when_3.q.out new file mode 100644 index 0000000000..f8e0a204f8 --- /dev/null +++ ql/src/test/results/clientpositive/vector_case_when_3.q.out @@ -0,0 +1,445 @@ +PREHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: CREATE TABLE lineitem_test_txt (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY INT, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DECIMAL(10,2), + L_RETURNFLAG CHAR(1), + L_LINESTATUS CHAR(1), + l_shipdate DATE, + L_COMMITDATE DATE, + L_RECEIPTDATE DATE, + L_SHIPINSTRUCT VARCHAR(20), + L_SHIPMODE CHAR(10), + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@lineitem_test_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem_test_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@lineitem_test_txt +PREHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@lineitem_test_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: CREATE TABLE lineitem_test STORED AS ORC AS SELECT * FROM lineitem_test_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@lineitem_test_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_comment, type:string, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_commitdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_commitdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_discount SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_discount, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_extendedprice SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_extendedprice, type:double, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linenumber SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linenumber, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_linestatus SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_linestatus, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_orderkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_orderkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_partkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_partkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_quantity SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_quantity, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_receiptdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_receiptdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_returnflag SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_returnflag, type:char(1), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipdate SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipdate, type:date, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipinstruct, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_shipmode SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_shipmode, type:char(10), comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_suppkey SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_suppkey, type:int, comment:null), ] +POSTHOOK: Lineage: lineitem_test.l_tax SIMPLE [(lineitem_test_txt)lineitem_test_txt.FieldSchema(name:l_tax, type:decimal(10,2), comment:null), ] +lineitem_test_txt.l_orderkey lineitem_test_txt.l_partkey lineitem_test_txt.l_suppkey lineitem_test_txt.l_linenumber lineitem_test_txt.l_quantity lineitem_test_txt.l_extendedprice lineitem_test_txt.l_discount lineitem_test_txt.l_tax lineitem_test_txt.l_returnflag lineitem_test_txt.l_linestatus lineitem_test_txt.l_shipdate lineitem_test_txt.l_commitdate lineitem_test_txt.l_receiptdate lineitem_test_txt.l_shipinstruct lineitem_test_txt.l_shipmode lineitem_test_txt.l_comment +PREHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lineitem_test +POSTHOOK: query: INSERT INTO TABLE lineitem_test VALUES (NULL,NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lineitem_test +POSTHOOK: Lineage: lineitem_test.l_comment EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_commitdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_discount EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_extendedprice EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linenumber EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_linestatus EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_orderkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_partkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_quantity EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_receiptdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_returnflag EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipdate EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipinstruct EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_shipmode EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_suppkey EXPRESSION [] +POSTHOOK: Lineage: lineitem_test.l_tax EXPRESSION [] +_col0 _col1 _col2 _col3 _col4 _col5 _col6 _col7 _col8 _col9 _col10 _col11 _col12 _col13 _col14 _col15 +PREHOOK: query: select L_COMMENT from lineitem_test +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: select L_COMMENT from lineitem_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +l_comment +egular courts above the +ly final dependencies: slyly bold +riously. regular, express dep +lites. fluffily even de + pending foxes. slyly re +arefully slyly ex +ven requests. deposits breach a +ongside of the furiously brave acco + unusual accounts. eve +nal foxes wake. +y. fluffily pending d +ages nag slyly pending +ges sleep after the caref +- quickly regular packages sleep. idly +ts wake furiously +sts use slyly quickly special instruc +eodolites. fluffily unusual +p furiously special foxes +ss pinto beans wake against th +es. instructions + unusual reques +. slyly special requests haggl +ns haggle carefully ironic deposits. bl +jole. excuses wake carefully alongside of +ithely regula +sleep quickly. req +lithely regular deposits. fluffily + express accounts wake according to the +e slyly final pac +symptotes nag according to the ironic depo + gifts cajole carefully. +ng to the furiously ironic package +gular theodolites +. stealthily bold exc +unusual packages doubt caref +nic accounts. deposits are alon +thely slyly p +ar foxes sleep +, regular tithe +s are carefully against the f + the carefully regular + quickly unti +. silent, unusual deposits boost +ly alongside of + careful courts. special +luffily regular requests. slyly final acco +the final requests. ca +iously ste +s. blithely unusual theodolites am +eodolites. careful +ckages across the slyly silent +he carefully e +heodolites sleep silently pending foxes. ac +yly regular i +quickly ironic fox +ch slyly final, thin platelets. +pending deposits nag even packages. ca + ideas. special, r +bove the even packages. accounts nag carefu +ut the unusual accounts sleep at the bo + regular de + cajole thinly expres + even packages cajole +y unusual packages thrash pinto +se quickly above the even, express reques +ly regular deposit +ultipliers +fully special instructions cajole. furious + requests are unusual, regular pinto +egular dependencies affix ironically along + excuses integrate fluffily +ccounts. deposits use. furiously +oxes are slyly blithely fin +eposits nag special ideas. furiousl +regular epitaphs. carefully even ideas hag +s sleep carefully bold, +final, pending instr + blithely final d +tect regular, speci +nding accounts ca +ggle. carefully pending dependenc +lyly special packag +quickly. fluffily unusual theodolites c +alongside of the deposits. fur +n accounts are. q + packages wake pending accounts. +ckly. slyly +y. pinto beans haggle after the + ironic packages believe blithely a + serve quickly fluffily bold deposi +l accounts sleep across the pack +s cajole. +ep-- carefully reg +e quickly even ideas. furiou +ayers cajole against the furiously +ic requests boost carefully quic +gifts. furiously ironic packages cajole. + pending, regular accounts s +. unusual instructions against + cajole furiously. blithely ironic ideas +NULL +PREHOOK: query: explain vectorization detail +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail +with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: lineitem_test + Statistics: Num rows: 101 Data size: 11211 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2)/DECIMAL_64, 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] + Select Operator + expressions: CASE WHEN ((l_comment like '%quickly%')) THEN ('quickly') ELSE (l_comment) END (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [18] + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val quicklycol 15:string)(children: SelectStringColLikeStringScalar(col 15:string) -> 17:boolean) -> 18:string + Statistics: Num rows: 101 Data size: 11211 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 18:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 16 + includeColumns: [15] + dataColumns: l_orderkey:int, l_partkey:int, l_suppkey:int, l_linenumber:int, l_quantity:int, l_extendedprice:double, l_discount:double, l_tax:decimal(10,2)/DECIMAL_64, l_returnflag:char(1), l_linestatus:char(1), l_shipdate:date, l_commitdate:date, l_receiptdate:date, l_shipinstruct:varchar(20), l_shipmode:char(10), l_comment:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint] + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: - + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:string, _col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +name c +quickly 9 + ideas. special, r 1 + gifts cajole carefully. 1 +yly regular i 1 + excuses integrate fluffily 1 + even packages cajole 1 +NULL 1 + cajole thinly expres 1 + cajole furiously. blithely ironic ideas 1 +y. pinto beans haggle after the 1 +PREHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +POSTHOOK: query: with test as ( + select (CASE + WHEN L_COMMENT like '%quickly%' THEN 'quickly' + ELSE L_COMMENT + END) AS name from lineitem_test +) +select name, count(*) as c from test group by name order by c desc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem_test +#### A masked pattern was here #### +name c +quickly 9 + ideas. special, r 1 + gifts cajole carefully. 1 +yly regular i 1 + excuses integrate fluffily 1 + even packages cajole 1 +NULL 1 + cajole thinly expres 1 + cajole furiously. blithely ironic ideas 1 +y. pinto beans haggle after the 1