diff --git itests/qtest/pom.xml itests/qtest/pom.xml
index 29988a6..df9e326 100644
--- itests/qtest/pom.xml
+++ itests/qtest/pom.xml
@@ -36,7 +36,7 @@
false
false
- stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q
+ stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q,root_dir_external_table.q,index_bitmap3.q,ql_rewrite_gbtoidx.q,index_bitmap_auto.q
cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q
tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q
join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q
diff --git ql/src/test/queries/clientpositive/index_bitmap3.q ql/src/test/queries/clientpositive/index_bitmap3.q
index fddc541..e7a093c 100644
--- ql/src/test/queries/clientpositive/index_bitmap3.q
+++ ql/src/test/queries/clientpositive/index_bitmap3.q
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
EXPLAIN
CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD;
EXPLAIN
@@ -12,16 +15,16 @@ SELECT * FROM default__src_src2_index__ ORDER BY value;
SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
--- WHERE key = 0) a
--- JOIN
--- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
--- WHERE value = "val_0") b
--- ON
--- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+ WHERE key = 0) a
+ JOIN
+ (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+ WHERE value = "val_0") b
+ ON
+ a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result"
SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
diff --git ql/src/test/queries/clientpositive/index_bitmap_auto.q ql/src/test/queries/clientpositive/index_bitmap_auto.q
index a7ebf1e..56cd44d 100644
--- ql/src/test/queries/clientpositive/index_bitmap_auto.q
+++ ql/src/test/queries/clientpositive/index_bitmap_auto.q
@@ -1,3 +1,6 @@
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
+
-- try the query without indexing, with manual indexing, and with automatic indexing
-- without indexing
SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key;
@@ -18,16 +21,16 @@ SELECT * FROM default__src_src2_index__ ORDER BY value;
-- manual indexing
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
--- WHERE key = 0) a
--- JOIN
--- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
--- WHERE value = "val_0") b
--- ON
--- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+ WHERE key = 0) a
+ JOIN
+ (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+ WHERE value = "val_0") b
+ ON
+ a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
INSERT OVERWRITE DIRECTORY "${system:test.tmp.dir}/index_result"
SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
diff --git ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
index 90f1e77..57e8cc6 100644
--- ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
+++ ql/src/test/queries/clientpositive/ql_rewrite_gbtoidx.q
@@ -1,4 +1,5 @@
--- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20, 0.20S, 0.23)
+set hive.stats.dbclass=counter;
+set hive.stats.autogather=true;
DROP TABLE lineitem;
CREATE TABLE lineitem (L_ORDERKEY INT,
diff --git ql/src/test/results/clientpositive/index_bitmap3.q.out ql/src/test/results/clientpositive/index_bitmap3.q.out
index 1472af7..cfe3a51 100644
--- ql/src/test/results/clientpositive/index_bitmap3.q.out
+++ ql/src/test/results/clientpositive/index_bitmap3.q.out
@@ -94,16 +94,138 @@ POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldS
POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
#### A masked pattern was here ####
-PREHOOK: query: -- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
--- WHERE key = 0) a
--- JOIN
--- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
--- WHERE value = "val_0") b
--- ON
--- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
+PREHOOK: query: EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+ WHERE key = 0) a
+ JOIN
+ (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+ WHERE value = "val_0") b
+ ON
+ a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+ WHERE key = 0) a
+ JOIN
+ (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+ WHERE value = "val_0") b
+ ON
+ a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: default__src_src1_index__
+ Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 0) (type: boolean)
+ Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: array)
+ TableScan
+ alias: default__src_src2_index__
+ Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value = 'val_0') (type: boolean)
+ Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: array)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ 1 {VALUE._col2}
+ outputColumnNames: _col0, _col1, _col2, _col5
+ Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean)
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: collect_set(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: array)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: collect_set(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: array)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
#### A masked pattern was here ####
SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
@@ -119,18 +241,6 @@ PREHOOK: type: QUERY
PREHOOK: Input: default@default__src_src1_index__
PREHOOK: Input: default@default__src_src2_index__
#### A masked pattern was here ####
-POSTHOOK: query: -- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
--- WHERE key = 0) a
--- JOIN
--- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
--- WHERE value = "val_0") b
--- ON
--- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
-
-#### A masked pattern was here ####
SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
WHERE key = 0) a
diff --git ql/src/test/results/clientpositive/index_bitmap_auto.q.out ql/src/test/results/clientpositive/index_bitmap_auto.q.out
index 23ca70e..b194892 100644
--- ql/src/test/results/clientpositive/index_bitmap_auto.q.out
+++ ql/src/test/results/clientpositive/index_bitmap_auto.q.out
@@ -112,16 +112,139 @@ POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchem
POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
#### A masked pattern was here ####
PREHOOK: query: -- manual indexing
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
--- WHERE key = 0) a
--- JOIN
--- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
--- WHERE value = "val_0") b
--- ON
--- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+ WHERE key = 0) a
+ JOIN
+ (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+ WHERE value = "val_0") b
+ ON
+ a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+PREHOOK: type: QUERY
+POSTHOOK: query: -- manual indexing
+EXPLAIN
+SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
+FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
+ WHERE key = 0) a
+ JOIN
+ (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
+ WHERE value = "val_0") b
+ ON
+ a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
+EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__src_src1_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src1_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: default__src_src2_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__src_src2_index__.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: default__src_src1_index__
+ Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (key = 0) (type: boolean)
+ Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 250 Data size: 23155 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: array)
+ TableScan
+ alias: default__src_src2_index__
+ Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (value = 'val_0') (type: boolean)
+ Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string), _col1 (type: bigint)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+ Statistics: Num rows: 250 Data size: 24155 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: array)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {VALUE._col0} {VALUE._col1} {VALUE._col2}
+ 1 {VALUE._col2}
+ outputColumnNames: _col0, _col1, _col2, _col5
+ Statistics: Num rows: 275 Data size: 25470 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean)
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: collect_set(_col1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 138 Data size: 12781 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: array)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: collect_set(VALUE._col0)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: array)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 69 Data size: 6390 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
#### A masked pattern was here ####
SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
@@ -137,19 +260,6 @@ PREHOOK: type: QUERY
PREHOOK: Input: default@default__src_src1_index__
PREHOOK: Input: default@default__src_src2_index__
#### A masked pattern was here ####
-POSTHOOK: query: -- manual indexing
--- EXPLAIN
--- SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
--- FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
--- WHERE key = 0) a
--- JOIN
--- (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__
--- WHERE value = "val_0") b
--- ON
--- a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT
--- EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname;
-
-#### A masked pattern was here ####
SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets`
FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__
WHERE key = 0) a
diff --git ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
index 2f36c4e..84f8d93 100644
--- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
+++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out
@@ -261,23 +261,23 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: default__lineitem_lineitem_lshipdate_idx__
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
outputColumnNames: l_shipdate, _count_of_l_shipdate
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
bucketGroup: true
keys: l_shipdate (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -285,14 +285,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -597,22 +597,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: default__lineitem_lineitem_lshipdate_idx__
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
outputColumnNames: l_shipdate, _count_of_l_shipdate
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -620,11 +620,11 @@ STAGE PLANS:
keys: KEY._col0 (type: int), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -639,14 +639,14 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint)
Reduce Operator Tree:
Extract
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -777,25 +777,25 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: lastyear:default__lineitem_lineitem_lshipdate_idx__
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (year(l_shipdate) = 1997) (type: boolean)
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
outputColumnNames: l_shipdate, _count_of_l_shipdate
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -803,11 +803,11 @@ STAGE PLANS:
keys: KEY._col0 (type: int), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: int), _col2 (type: bigint)
outputColumnNames: _col1, _col2
- Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -823,14 +823,14 @@ STAGE PLANS:
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: bigint)
TableScan
Reduce Output Operator
key expressions: _col1 (type: int)
sort order: +
Map-reduce partition columns: _col1 (type: int)
- Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: bigint)
Reduce Operator Tree:
Join Operator
@@ -840,14 +840,14 @@ STAGE PLANS:
0 {VALUE._col1} {VALUE._col2}
1 {VALUE._col1} {VALUE._col2}
outputColumnNames: _col1, _col2, _col4, _col5
- Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: int), _col4 (type: int), ((_col5 - _col2) / _col2) (type: decimal(38,19))
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 25 Data size: 2966 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 25 Data size: 2308 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -858,25 +858,25 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: thisyear:default__lineitem_lineitem_lshipdate_idx__
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (year(l_shipdate) = 1998) (type: boolean)
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
outputColumnNames: l_shipdate, _count_of_l_shipdate
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
keys: year(l_shipdate) (type: int), month(l_shipdate) (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -884,11 +884,11 @@ STAGE PLANS:
keys: KEY._col0 (type: int), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: int), _col2 (type: bigint)
outputColumnNames: _col1, _col2
- Statistics: Num rows: 23 Data size: 2697 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 23 Data size: 2099 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -927,23 +927,23 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: null-subquery1:default__lineitem_lineitem_lshipdate_idx__
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: l_shipdate (type: string), _count_of_l_shipdate (type: bigint)
outputColumnNames: l_shipdate, _count_of_l_shipdate
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_l_shipdate)
bucketGroup: true
keys: l_shipdate (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 95 Data size: 11145 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 95 Data size: 8675 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -951,11 +951,11 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 47 Data size: 5513 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 47 Data size: 4291 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -968,14 +968,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Union
- Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -988,14 +988,14 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 116 Data size: 12099 Basic stats: COMPLETE Column stats: NONE
Union
- Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 163 Data size: 17612 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 163 Data size: 16390 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -3204,23 +3204,23 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: default__tbl_tbl_key_idx__
- Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), _count_of_key (type: bigint)
outputColumnNames: key, _count_of_key
- Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_count_of_key)
bucketGroup: true
keys: key (type: int)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 6 Data size: 586 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6 Data size: 430 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -3228,11 +3228,11 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: bigint)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -3247,14 +3247,14 @@ STAGE PLANS:
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
- Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: int), _col1 (type: bigint)
Reduce Operator Tree:
Extract
- Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 3 Data size: 293 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 215 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat