diff --git itests/qtest/pom.xml itests/qtest/pom.xml index 29988a6..df9e326 100644 --- itests/qtest/pom.xml +++ itests/qtest/pom.xml @@ -36,7 +36,7 @@ false false - stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q + stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q diff --git ql/src/test/queries/clientpositive/index_bitmap3.q ql/src/test/queries/clientpositive/index_bitmap3.q index fddc541..e7a093c 100644 --- ql/src/test/queries/clientpositive/index_bitmap3.q +++ ql/src/test/queries/clientpositive/index_bitmap3.q @@ -1,3 +1,6 @@ +set hive.stats.dbclass=counter; +set hive.stats.autogather=true; + EXPLAIN CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; EXPLAIN @@ -12,16 +15,16 @@ SELECT * FROM default__src_src2_index__ ORDER BY value; SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; --- EXPLAIN --- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` --- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ --- WHERE key = 0) a --- JOIN --- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ --- WHERE value = "val_0") b --- ON --- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT --- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` diff --git ql/src/test/queries/clientpositive/index_bitmap_auto.q ql/src/test/queries/clientpositive/index_bitmap_auto.q index a7ebf1e..56cd44d 100644 --- ql/src/test/queries/clientpositive/index_bitmap_auto.q +++ ql/src/test/queries/clientpositive/index_bitmap_auto.q @@ -1,3 +1,6 @@ +set hive.stats.dbclass=counter; +set hive.stats.autogather=true; + -- try the query without indexing, with manual indexing, and with automatic indexing -- without indexing SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key; @@ -18,16 +21,16 @@ SELECT * FROM default__src_src2_index__ ORDER BY value; -- manual indexing --- EXPLAIN --- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` --- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ --- WHERE key = 0) a --- JOIN --- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ --- WHERE value = "val_0") b --- ON --- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT --- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result" SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` diff --git ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q index 90f1e77..57e8cc6 100644 --- ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q +++ ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q @@ -1,4 +1,5 @@ --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S, 0.23) +set hive.stats.dbclass=counter; +set hive.stats.autogather=true; DROP TABLE lineitem; CREATE TABLE lineitem (L_ORDERKEY INT, diff --git ql/src/test/results/clientpositive/index_bitmap3.q.out ql/src/test/results/clientpositive/index_bitmap3.q.out index 1472af7..cfe3a51 100644 --- ql/src/test/results/clientpositive/index_bitmap3.q.out +++ ql/src/test/results/clientpositive/index_bitmap3.q.out @@ -94,16 +94,138 @@ POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldS POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] #### A masked pattern was here #### -PREHOOK: query: -- EXPLAIN --- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` --- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ --- WHERE key = 0) a --- JOIN --- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ --- WHERE value = "val_0") b --- ON --- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT --- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; +PREHOOK: query: EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default__src_src1_index__ + Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: array) + TableScan + alias: default__src_src2_index__ + Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value = 'val_0') (type: boolean) + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col2} + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: collect_set(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 #### A masked pattern was here #### SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` @@ -119,18 +241,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@default__src_src1_index__ PREHOOK: Input: default@default__src_src2_index__ #### A masked pattern was here #### -POSTHOOK: query: -- EXPLAIN --- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` --- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ --- WHERE key = 0) a --- JOIN --- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ --- WHERE value = "val_0") b --- ON --- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT --- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; - -#### A masked pattern was here #### SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ WHERE key = 0) a diff --git ql/src/test/results/clientpositive/index_bitmap_auto.q.out ql/src/test/results/clientpositive/index_bitmap_auto.q.out index 23ca70e..b194892 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto.q.out @@ -112,16 +112,139 @@ POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchem POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] #### A masked pattern was here #### PREHOOK: query: -- manual indexing --- EXPLAIN --- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` --- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ --- WHERE key = 0) a --- JOIN --- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ --- WHERE value = "val_0") b --- ON --- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT --- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +PREHOOK: type: QUERY +POSTHOOK: query: -- manual indexing +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: default__src_src1_index__ + Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key = 0) (type: boolean) + Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: array) + TableScan + alias: default__src_src2_index__ + Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (value = 'val_0') (type: boolean) + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: array) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col2} + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: collect_set(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: array) + Reduce Operator Tree: + Group By Operator + aggregations: collect_set(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: array) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 #### A masked pattern was here #### SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` @@ -137,19 +260,6 @@ PREHOOK: type: QUERY PREHOOK: Input: default@default__src_src1_index__ PREHOOK: Input: default@default__src_src2_index__ #### A masked pattern was here #### -POSTHOOK: query: -- manual indexing --- EXPLAIN --- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` --- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ --- WHERE key = 0) a --- JOIN --- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ --- WHERE value = "val_0") b --- ON --- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT --- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; - -#### A masked pattern was here #### SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ WHERE key = 0) a diff --git ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out index 2f36c4e..84f8d93 100644 --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out @@ -261,23 +261,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__lineitem_lineitem_lshipdate_idx__ - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) bucketGroup: true keys: l_shipdate (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -285,14 +285,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -597,22 +597,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__lineitem_lineitem_lshipdate_idx__ - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -620,11 +620,11 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -639,14 +639,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Extract - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -777,25 +777,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: lastyear:default__lineitem_lineitem_lshipdate_idx__ - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1997) (type: boolean) - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -803,11 +803,11 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col1, _col2 - Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -823,14 +823,14 @@ STAGE PLANS: key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: bigint) TableScan Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -840,14 +840,14 @@ STAGE PLANS: 0 {VALUE._col1} {VALUE._col2} 1 {VALUE._col1} {VALUE._col2} outputColumnNames: _col1, _col2, _col4, _col5 - Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col4 (type: int), ((_col5 - _col2) / _col2) (type: decimal(38,19)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -858,25 +858,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: thisyear:default__lineitem_lineitem_lshipdate_idx__ - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (year(l_shipdate) = 1998) (type: boolean) - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -884,11 +884,11 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col2 (type: bigint) outputColumnNames: _col1, _col2 - Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -927,23 +927,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: null-subquery1:default__lineitem_lineitem_lshipdate_idx__ - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint) outputColumnNames: l_shipdate, _count_of_l_shipdate - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_l_shipdate) bucketGroup: true keys: l_shipdate (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -951,11 +951,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -968,14 +968,14 @@ STAGE PLANS: Map Operator Tree: TableScan Union - Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -988,14 +988,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE Union - Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3204,23 +3204,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__tbl_tbl_key_idx__ - Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), _count_of_key (type: bigint) outputColumnNames: key, _count_of_key - Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_count_of_key) bucketGroup: true keys: key (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -3228,11 +3228,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -3247,14 +3247,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: bigint) Reduce Operator Tree: Extract - Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat