diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index b584c72..0f13d62 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -9,7 +9,6 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ cbo_rp_subq_in.q,\ cbo_rp_subq_not_in.q,\ cbo_rp_subq_exists.q,\ - orc_llap.q,\ ql_rewrite_gbtoidx_cbo_2.q,\ rcfile_merge1.q,\ stats_filemetadata.q,\ @@ -581,6 +580,7 @@ minillaplocal.query.files=\ non_native_window_udf.q,\ optimize_join_ptp.q,\ orc_analyze.q,\ + orc_llap.q,\ orc_llap_nonvector.q,\ orc_ppd_date.q,\ tez_input_counters.q,\ diff --git ql/src/test/queries/clientpositive/orc_llap.q ql/src/test/queries/clientpositive/orc_llap.q index e2d9080..e487480 100644 --- ql/src/test/queries/clientpositive/orc_llap.q +++ ql/src/test/queries/clientpositive/orc_llap.q @@ -1,6 +1,5 @@ --! qt:dataset:alltypesorc -set hive.vectorized.execution.enabled=false; set hive.mapred.mode=nonstrict; SET hive.vectorized.execution.enabled=true; diff --git ql/src/test/results/clientpositive/llap/orc_llap.q.out ql/src/test/results/clientpositive/llap/orc_llap.q.out new file mode 100644 index 0000000..52c7825 --- /dev/null +++ ql/src/test/results/clientpositive/llap/orc_llap.q.out @@ -0,0 +1,1148 @@ +PREHOOK: query: DROP TABLE cross_numbers +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE cross_numbers +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orc_llap +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE orc_llap_small +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE orc_llap_small +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE orc_llap( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) + STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap +POSTHOOK: query: CREATE TABLE orc_llap( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT, + cbigint BIGINT, + cfloat FLOAT, + cdouble DOUBLE, + cstring1 STRING, + cstring2 STRING, + ctimestamp1 TIMESTAMP, + ctimestamp2 TIMESTAMP, + cboolean1 BOOLEAN, + cboolean2 BOOLEAN) + STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap +PREHOOK: query: CREATE TABLE orc_llap_small( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT) + STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_llap_small +POSTHOOK: query: CREATE TABLE orc_llap_small( + ctinyint TINYINT, + csmallint SMALLINT, + cint INT) + STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_llap_small +PREHOOK: query: create table cross_numbers(i int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@cross_numbers +POSTHOOK: query: create table cross_numbers(i int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@cross_numbers +PREHOOK: query: insert into table cross_numbers +select distinct csmallint +from alltypesorc where csmallint > 0 order by csmallint limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@cross_numbers +POSTHOOK: query: insert into table cross_numbers +select distinct csmallint +from alltypesorc where csmallint > 0 order by csmallint limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@cross_numbers +POSTHOOK: Lineage: cross_numbers.i EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: insert into table orc_llap +select ctinyint + i, csmallint + i, cint + i, cbigint + i, cfloat + i, cdouble + i, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 +from alltypesorc cross join cross_numbers +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@cross_numbers +PREHOOK: Output: default@orc_llap +POSTHOOK: query: insert into table orc_llap +select ctinyint + i, csmallint + i, cint + i, cbigint + i, cfloat + i, cdouble + i, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 +from alltypesorc cross join cross_numbers +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@cross_numbers +POSTHOOK: Output: default@orc_llap +POSTHOOK: Lineage: orc_llap.cbigint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_llap.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_llap.cdouble EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cfloat EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.csmallint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_llap.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: orc_llap.ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_llap.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_llap.ctinyint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +PREHOOK: query: insert into table orc_llap_small +select ctinyint, csmallint, cint from alltypesorc where ctinyint is not null and cint is not null limit 15 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@orc_llap_small +POSTHOOK: query: insert into table orc_llap_small +select ctinyint, csmallint, cint from alltypesorc where ctinyint is not null and cint is not null limit 15 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@orc_llap_small +POSTHOOK: Lineage: orc_llap_small.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap_small.csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: orc_llap_small.ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain +select count(1) from orc_llap_small y join orc_llap_small x +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from orc_llap_small y join orc_llap_small x +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (XPROD_EDGE), Map 4 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 15 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 15 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 15 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Statistics: Num rows: 225 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: select count(1) from orc_llap_small y join orc_llap_small x +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_small +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(1) from orc_llap_small y join orc_llap_small x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_small +POSTHOOK: Output: hdfs://### HDFS PATH ### +225 +PREHOOK: query: select count(*) from orc_llap_small +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_small +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from orc_llap_small +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_small +POSTHOOK: Output: hdfs://### HDFS PATH ### +15 +PREHOOK: query: select count(*) from orc_llap_small where cint < 60000000 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap_small +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select count(*) from orc_llap_small where cint < 60000000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap_small +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +PREHOOK: query: explain +select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 122880 Data size: 1467736 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 30577 Data size: 365240 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(cint,csmallint,cbigint) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 30577 Data size: 365240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-558222259686 +PREHOOK: query: explain +select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 122880 Data size: 30929630 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 30577 Data size: 7696590 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 30577 Data size: 7696590 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-197609091139 +PREHOOK: query: explain +select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: ((cint > 5) and (cint < 10)) (type: boolean) + Statistics: Num rows: 122880 Data size: 9173100 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint < 10) and (cint > 5)) (type: boolean) + Statistics: Num rows: 13653 Data size: 1019300 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(cstring2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 13653 Data size: 1019300 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL +PREHOOK: query: explain +select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + Statistics: Num rows: 122880 Data size: 17429830 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring1 (type: string), cstring2 (type: string) + outputColumnNames: cstring1, cstring2 + Statistics: Num rows: 122880 Data size: 17429830 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: cstring1 (type: string), cstring2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 122880 Data size: 18412870 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 122880 Data size: 18412870 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 122880 Data size: 18412870 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 122880 Data size: 18412870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-201218541193 +PREHOOK: query: explain +select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: o1 + filterExpr: (csmallint is not null and cbigint is not null) (type: boolean) + Statistics: Num rows: 122880 Data size: 9724466 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cbigint is not null and csmallint is not null) (type: boolean) + Statistics: Num rows: 68484 Data size: 5419732 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: csmallint (type: smallint), cstring1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 68484 Data size: 5419732 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 68484 Data size: 5419732 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: o2 + filterExpr: (csmallint is not null and cbigint is not null) (type: boolean) + Statistics: Num rows: 122880 Data size: 9906988 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cbigint is not null and csmallint is not null) (type: boolean) + Statistics: Num rows: 68484 Data size: 5521416 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: csmallint (type: smallint), cstring2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 68484 Data size: 5521416 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col2, _col5 + input vertices: + 0 Map 1 + Statistics: Num rows: 304114 Data size: 54483824 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col2,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 304114 Data size: 54483824 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-735462183586256 +Warning: Shuffle Join MERGEJOIN[10][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: insert into table orc_llap +select ctinyint + i, csmallint + i, cint + i, cbigint + i, cfloat + i, cdouble + i, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 +from alltypesorc cross join cross_numbers +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Input: default@cross_numbers +PREHOOK: Output: default@orc_llap +POSTHOOK: query: insert into table orc_llap +select ctinyint + i, csmallint + i, cint + i, cbigint + i, cfloat + i, cdouble + i, cstring1, cstring2, ctimestamp1, ctimestamp2, cboolean1, cboolean2 +from alltypesorc cross join cross_numbers +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Input: default@cross_numbers +POSTHOOK: Output: default@orc_llap +POSTHOOK: Lineage: orc_llap.cbigint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_llap.cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:null), ] +POSTHOOK: Lineage: orc_llap.cdouble EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cfloat EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.csmallint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +POSTHOOK: Lineage: orc_llap.cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: orc_llap.cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:null), ] +POSTHOOK: Lineage: orc_llap.ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_llap.ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:null), ] +POSTHOOK: Lineage: orc_llap.ctinyint EXPRESSION [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:null), (cross_numbers)cross_numbers.FieldSchema(name:i, type:int, comment:null), ] +PREHOOK: query: alter table orc_llap concatenate +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_llap +PREHOOK: Output: default@orc_llap +POSTHOOK: query: alter table orc_llap concatenate +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: default@orc_llap +PREHOOK: query: explain +select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 245760 Data size: 2935456 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 61153 Data size: 730452 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(cint,csmallint,cbigint) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61153 Data size: 730452 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select cint, csmallint, cbigint from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-1116444519372 +PREHOOK: query: explain +select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 245760 Data size: 61859030 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint > 10) and cbigint is not null) (type: boolean) + Statistics: Num rows: 61153 Data size: 15392750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61153 Data size: 15392750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select * from orc_llap where cint > 10 and cbigint is not null) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-395218182278 +PREHOOK: query: explain +select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + filterExpr: ((cint > 5) and (cint < 10)) (type: boolean) + Statistics: Num rows: 245760 Data size: 18346100 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint < 10) and (cint > 5)) (type: boolean) + Statistics: Num rows: 27306 Data size: 2038500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(cstring2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 27306 Data size: 2038500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select cstring2 from orc_llap where cint > 5 and cint < 10) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +NULL +PREHOOK: query: explain +select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_llap + Statistics: Num rows: 245760 Data size: 34859470 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring1 (type: string), cstring2 (type: string) + outputColumnNames: cstring1, cstring2 + Statistics: Num rows: 245760 Data size: 34859470 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: cstring1 (type: string), cstring2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 245760 Data size: 36825550 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 245760 Data size: 36825550 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 245760 Data size: 36825550 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0,_col1,_col2) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 245760 Data size: 36825550 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select cstring1, cstring2, count(*) from orc_llap group by cstring1, cstring2) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-201218418313 +PREHOOK: query: explain +select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +PREHOOK: type: QUERY +POSTHOOK: query: explain +select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: o1 + filterExpr: (csmallint is not null and cbigint is not null) (type: boolean) + Statistics: Num rows: 245760 Data size: 19448826 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cbigint is not null and csmallint is not null) (type: boolean) + Statistics: Num rows: 136968 Data size: 10839366 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: csmallint (type: smallint), cstring1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 136968 Data size: 10839366 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 136968 Data size: 10839366 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: o2 + filterExpr: (csmallint is not null and cbigint is not null) (type: boolean) + Statistics: Num rows: 245760 Data size: 19813868 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cbigint is not null and csmallint is not null) (type: boolean) + Statistics: Num rows: 136968 Data size: 11042828 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: csmallint (type: smallint), cstring2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 136968 Data size: 11042828 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 136968 Data size: 11042828 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 1216459 Data size: 224531444 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col2,_col5) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1216459 Data size: 224531444 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_llap +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select sum(hash(*)) from (select o1.cstring1, o2.cstring2 from orc_llap o1 inner join orc_llap o2 on o1.csmallint = o2.csmallint where o1.cbigint is not null and o2.cbigint is not null) t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: hdfs://### HDFS PATH ### +-2941848734345024 +PREHOOK: query: DROP TABLE cross_numbers +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@cross_numbers +PREHOOK: Output: default@cross_numbers +POSTHOOK: query: DROP TABLE cross_numbers +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@cross_numbers +POSTHOOK: Output: default@cross_numbers +PREHOOK: query: DROP TABLE orc_llap +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap +PREHOOK: Output: default@orc_llap +POSTHOOK: query: DROP TABLE orc_llap +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap +POSTHOOK: Output: default@orc_llap +PREHOOK: query: DROP TABLE orc_llap_small +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@orc_llap_small +PREHOOK: Output: default@orc_llap_small +POSTHOOK: query: DROP TABLE orc_llap_small +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@orc_llap_small +POSTHOOK: Output: default@orc_llap_small