diff --git ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index cdd934c..a51f258 100644 --- ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -237,7 +237,7 @@ min -64 max 62 num_nulls 3115 -distinct_count 130 +distinct_count 127 avg_col_len max_col_len num_trues diff --git ql/src/test/results/clientpositive/groupby_join_pushdown.q.out ql/src/test/results/clientpositive/groupby_join_pushdown.q.out index eed2f65..320ee4d 100644 --- ql/src/test/results/clientpositive/groupby_join_pushdown.q.out +++ ql/src/test/results/clientpositive/groupby_join_pushdown.q.out @@ -889,13 +889,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint) Execution mode: vectorized Reduce Operator Tree: @@ -904,7 +904,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -921,7 +921,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint) TableScan Reduce Output Operator @@ -929,7 +929,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -938,14 +938,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -970,20 +970,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1038,13 +1038,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: vectorized Reduce Operator Tree: @@ -1053,7 +1053,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1070,7 +1070,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) TableScan Reduce Output Operator @@ -1078,7 +1078,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -1087,14 +1087,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col1 - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1119,20 +1119,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1187,13 +1187,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1202,7 +1202,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1219,7 +1219,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1227,7 +1227,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1237,14 +1237,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col1, _col3 - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1270,13 +1270,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1285,7 +1285,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1340,13 +1340,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1355,7 +1355,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1372,7 +1372,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1380,7 +1380,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1390,14 +1390,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 100 Data size: 1904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1423,13 +1423,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1438,7 +1438,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1493,13 +1493,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1508,7 +1508,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1525,7 +1525,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1533,7 +1533,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1543,14 +1543,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 100 Data size: 1904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1576,13 +1576,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1591,7 +1591,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1667,14 +1667,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1161499 Data size: 13900620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1188936 Data size: 14229864 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: tinyint), _col2 (type: tinyint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1691,7 +1691,7 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1700,14 +1700,14 @@ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 17161 Data size: 205752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 196440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17161 Data size: 205752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 196440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1793,14 +1793,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 858611 Data size: 10275444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 885725 Data size: 10600812 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: tinyint), _col2 (type: tinyint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9801 Data size: 156608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1817,7 +1817,7 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) - Statistics: Num rows: 9801 Data size: 156608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1826,14 +1826,14 @@ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9801 Data size: 156608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9801 Data size: 117508 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 110500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9801 Data size: 117508 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 110500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 1a05333..a81a79f 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -192,7 +192,7 @@ keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 221 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 85d2e19..06cd5c2 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -1253,10 +1253,10 @@ 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1336,10 +1336,10 @@ outputColumnNames: _col0, _col1 input vertices: 0 Reducer 2 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1464,14 +1464,14 @@ 0 _col1 (type: double) 1 _col1 (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1580,14 +1580,14 @@ 0 _col1 (type: double) 1 _col1 (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 36bd120..925b400 100644 --- ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -627,7 +627,7 @@ Stage-1 Map 1 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=401/480 width=95) + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -668,48 +668,41 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap + Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=633/1166 width=95) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_47] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_24] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select s1.key as key, s1.value as value from tab_n6 s1 join tab2_n3 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -749,7 +742,7 @@ Stage-1 Map 1 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=401/480 width=95) + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -798,48 +791,41 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap + Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=633/1166 width=95) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_47] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab2_n3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_24] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab2_n3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab2_n3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab2_n3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab_n6 s1 join tab_n6 s3 on s1.key=s3.key UNION ALL @@ -901,7 +887,7 @@ Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_22] - Merge Join Operator [MERGEJOIN_60] (rows=1061/1646 width=8) + Merge Join Operator [MERGEJOIN_60] (rows=1029/1646 width=8) Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_19] @@ -916,7 +902,7 @@ <-Map 1 [CONTAINS] llap Reduce Output Operator [RS_70] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_67] (rows=401/480 width=4) + Merge Join Operator [MERGEJOIN_67] (rows=382/480 width=4) Conds:SEL_65._col0=SEL_5._col0(Inner),Output:["_col0"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -985,25 +971,24 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Map 10 (SIMPLE_EDGE), Union 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Union 3 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap + Reducer 5 llap File Output Operator [FS_31] Group By Operator [GBY_29] (rows=1/1 width=8) Output:["_col0"],aggregations:["count()"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_28] - Merge Join Operator [MERGEJOIN_81] (rows=1443/3768 width=8) - Conds:Union 4._col0=RS_25._col0(Inner) - <-Map 10 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_82] (rows=1396/3768 width=8) + Conds:Union 3._col0=RS_25._col0(Inner) + <-Map 9 [SIMPLE_EDGE] llap SHUFFLE [RS_25] PartitionCols:_col0 Select Operator [SEL_23] (rows=500/500 width=4) @@ -1012,53 +997,47 @@ predicate:key is not null TableScan [TS_21] (rows=500/500 width=4) default@tab_part_n7,b_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Union 4 [SIMPLE_EDGE] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_89] + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_90] PartitionCols:_col0 - Select Operator [SEL_87] (rows=242/242 width=4) + Select Operator [SEL_88] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_86] (rows=242/242 width=4) + Filter Operator [FIL_87] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_85] (rows=242/242 width=4) + TableScan [TS_86] (rows=242/242 width=4) Output:["key"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_84] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_85] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=633/1166 width=4) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0"] - <-Map 8 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_83] (rows=604/1166 width=4) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_43] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_42] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_44] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_79] (rows=382/480 width=4) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_42] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_43] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_44] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: CREATE TABLE a_n14(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 63e524d..9a4ceee 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -577,7 +577,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -681,7 +681,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -789,7 +789,7 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 48d75cd..74f137c 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -628,12 +628,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -642,7 +642,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE @@ -753,12 +753,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -767,7 +767,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE @@ -882,12 +882,12 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -896,7 +896,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 208646b..37a9819 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -582,7 +582,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -687,7 +687,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -796,7 +796,7 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index e74bc44..5b14c91 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -289,10 +289,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 + minReductionHashAggr: 0.918 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -302,7 +302,7 @@ className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -335,7 +335,7 @@ keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -344,7 +344,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -362,7 +362,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -495,10 +495,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 + minReductionHashAggr: 0.918 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -508,7 +508,7 @@ className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -540,7 +540,7 @@ keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0) Group By Vectorization: @@ -554,7 +554,7 @@ keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 85 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -562,7 +562,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -571,7 +571,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -588,13 +588,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index 606aa62..72b8f87 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -64,7 +64,7 @@ 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 1528346 Data size: 12226768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1564475 Data size: 12515800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index e1218d6..3dda1f1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -739,12 +739,12 @@ native: true predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 2:int, val 500000000), FilterDoubleColGreaterDoubleScalar(col 5:double, val 1.0E9), FilterLongColEqualLongScalar(col 0:tinyint, val 0)) predicate: ((cint > 500000000) or (cdouble > 1.0E9D) or (ctinyint = 0Y)) (type: boolean) - Statistics: Num rows: 3378 Data size: 60552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 60576 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++ keys: cint (type: int), cbigint (type: bigint), ctinyint (type: tinyint), (UDFToDouble(cint) / UDFToDouble((cint - 528534767))) (type: double), (UDFToDouble(cbigint) / UDFToDouble((cbigint - 1018195815L))) (type: double), (UDFToDouble(ctinyint) / UDFToDouble(ctinyint)) (type: double), (cint % (cint - 528534767)) (type: int), (cbigint % (cbigint - 1018195815L)) (type: bigint), (ctinyint % ctinyint) (type: tinyint) null sort order: zzzzzzzzz - Statistics: Num rows: 3378 Data size: 60552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 60576 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -758,7 +758,7 @@ native: true projectedOutputColumnNums: [2, 3, 0, 17, 19, 21, 18, 24, 14] selectExpressions: DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 13:double, CastLongToDouble(col 14:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 14:int) -> 15:double) -> 17:double, DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 13:double, CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 14:bigint) -> 15:double) -> 19:double, DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 21:double, LongColModuloLongColumn(col 2:int, col 14:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 14:int) -> 18:int, LongColModuloLongColumn(col 3:bigint, col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 14:bigint) -> 24:bigint, LongColModuloLongColumn(col 0:tinyint, col 0:tinyint) -> 14:tinyint - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: int), _col7 (type: bigint), _col8 (type: tinyint) null sort order: zzzzzzzzz @@ -767,7 +767,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -796,7 +796,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out index a655c16..8f8dcbe 100644 --- ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out @@ -188,16 +188,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -224,14 +224,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -530,16 +530,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs (cache only) @@ -561,14 +561,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -872,16 +872,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -908,14 +908,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1262,16 +1262,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1293,14 +1293,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 36276e1..14f58a6 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -335,10 +335,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -350,7 +350,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:double, 2:bigint - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -398,12 +398,12 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: tinyint), (_col1 / _col2) (type: double) null sort order: zz - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -417,7 +417,7 @@ native: true projectedOutputColumnNums: [0, 4] selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 4:double - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) null sort order: zz @@ -427,7 +427,7 @@ keyColumns: 0:tinyint, 4:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -452,7 +452,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -565,10 +565,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -580,7 +580,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 0:tinyint - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs @@ -626,7 +626,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -814,12 +814,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: tinyint), _col1 (type: bigint) null sort order: zz - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -834,7 +834,7 @@ keyColumns: 0:tinyint, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -859,7 +859,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index ca71b6e..e7dba7f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -477,10 +477,10 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 395e3e0..8bfe3dc 100644 --- ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -55,7 +55,7 @@ outputColumnNames: _col3 input vertices: 1 Map 4 - Statistics: Num rows: 1389803 Data size: 11104552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1433691 Data size: 11455656 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3) minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 152f4f2..762dcbf 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -158,16 +158,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -194,14 +194,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 1cdecf1..68a672c 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -242,7 +242,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -253,7 +253,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized @@ -276,11 +276,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -373,7 +373,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -384,7 +384,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -405,7 +405,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -533,7 +533,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/topnkey_grouping_sets.q.out ql/src/test/results/clientpositive/topnkey_grouping_sets.q.out index 27998ef..bdc2209 100644 --- ql/src/test/results/clientpositive/topnkey_grouping_sets.q.out +++ ql/src/test/results/clientpositive/topnkey_grouping_sets.q.out @@ -257,7 +257,7 @@ 7 8 10 11 10 NULL -NULL 1 +NULL NULL PREHOOK: query: SELECT a, b FROM t_test_grouping_sets GROUP BY a,b GROUPING SETS ((a,b), (a), (b), ()) ORDER BY a LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@t_test_grouping_sets diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 6208046..54e951e 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -97,7 +97,7 @@ keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) - Statistics: Num rows: 1528346 Data size: 12226768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1564475 Data size: 12515800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index c121d9d..e6d0569 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -267,7 +267,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -278,7 +278,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -306,11 +306,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -334,7 +334,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -360,7 +360,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -454,7 +454,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -465,7 +465,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -492,7 +492,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -627,7 +627,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -651,7 +651,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -677,7 +677,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 5aecbe8..4c209bb 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -338,10 +338,10 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java index d5ac54a..d454e37 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -104,21 +104,21 @@ /** *
    * Input: 64 bit hashcode
-   * 
+   *
    * |---------w-------------| |------------p'----------|
    * 10101101.......1010101010 10101010101 01010101010101
    *                                       |------p-----|
-   *                                       
+   *
    * Output: 32 bit int
-   * 
+   *
    * |b| |-q'-|  |------------p'----------|
    *  1  010101  01010101010 10101010101010
    *                         |------p-----|
-   *                    
-   * 
+   *
+   *
    * The default values of p', q' and b are 25, 6, 1 (total 32 bits) respectively.
    * This function will return an int encoded in the following format
-   * 
+   *
    * p  - LSB p bits represent the register index
    * p' - LSB p' bits are used for increased accuracy in estimation
    * q' - q' bits after p' are left as such from the hashcode if b = 0 else
@@ -148,8 +148,12 @@
     }
   }
 
-  public int getSize() {
-    return sparseMap.size() + tempListIdx;
+  public boolean isSizeGreaterThan(int s) {
+    if (sparseMap.size() + tempListIdx > s) {
+      mergeTempListToSparseMap();
+      return sparseMap.size() > s;
+    }
+    return false;
   }
 
   public void merge(HLLRegister hllRegister) {
@@ -195,7 +199,7 @@
       byte lr = entry.getValue(); // this can be a max of 65, never > 127
       if (lr != 0) {
         // should be a no-op for sparse
-        dest.add((long) ((1 << (p + lr - 1)) | idx));
+        dest.add((1 << (p + lr - 1)) | idx);
       }
     }
   }
diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index 91a6865..edf587f 100644
--- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -20,7 +20,6 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.Map;
 import java.util.TreeMap;
@@ -30,17 +29,19 @@
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hive.common.util.Murmur3;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * 
  * This is an implementation of the following variants of hyperloglog (HLL)
- * algorithm 
+ * algorithm
  * Original  - Original HLL algorithm from Flajolet et. al from
  *             http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
  * HLLNoBias - Google's implementation of bias correction based on lookup table
  *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
  * HLL++     - Google's implementation of HLL++ algorithm that uses SPARSE registers
  *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
- * 
+ *
  * Following are the constructor parameters that determines which algorithm is
  * used
  * numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
@@ -194,7 +195,7 @@
     } else if (hashBits <= 64) {
       alphaMM = 0.709f;
     } else {
-      alphaMM = 0.7213f / (float) (1 + 1.079f / m);
+      alphaMM = 0.7213f / (1 + 1.079f / m);
     }
 
     // For efficiency alpha is multiplied by m^2
@@ -258,7 +259,7 @@
 
       // if size of sparse map excess the threshold convert the sparse map to
       // dense register and switch to DENSE encoding
-      if (sparseRegister.getSize() > encodingSwitchThreshold) {
+      if (sparseRegister.isSizeGreaterThan(encodingSwitchThreshold)) {
         encoding = EncodingType.DENSE;
         denseRegister = sparseToDenseRegister(sparseRegister);
         sparseRegister = null;
@@ -386,7 +387,7 @@
   }
 
   private long linearCount(int mVal, long numZeros) {
-    return (long) (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
+    return (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
   }
 
   // refer paper
@@ -459,7 +460,7 @@
       sparseRegister.merge(hll.getHLLSparseRegister());
       // if after merge the sparse switching threshold is exceeded then change
       // to dense encoding
-      if (sparseRegister.getSize() > encodingSwitchThreshold) {
+      if (sparseRegister.isSizeGreaterThan(encodingSwitchThreshold)) {
         encoding = EncodingType.DENSE;
         denseRegister = sparseToDenseRegister(sparseRegister);
         sparseRegister = null;
@@ -481,7 +482,7 @@
 
   /**
    * Reduces the accuracy of the HLL provided to a smaller size
-   * @param p0 
+   * @param p0
    *         - new p size for the new HyperLogLog (smaller or no change)
    * @return reduced (or same) HyperLogLog instance
    */
@@ -661,4 +662,9 @@
     return o instanceof HyperLogLog;
   }
 
+  @VisibleForTesting
+  public int getEncodingSwitchThreshold() {
+    return encodingSwitchThreshold;
+  }
+
 }
diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
index e014fb5..e720ec8 100644
--- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
+++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.common.ndv.hll;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType;
 import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest;
@@ -49,27 +50,27 @@
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
     double delta4 = threshold * (4*size) / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
+    assertEquals(size, hll.count(), delta);
+    assertEquals(size, hll2.count(), delta);
 
     // merge
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // merge should update registers and hence the count
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // new merge
     hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
+    assertEquals((double) 3 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
-    // valid merge -- register set size gets bigger (also 4k items 
+    // valid merge -- register set size gets bigger (also 4k items
     hll.merge(hll4);
-    assertEquals((double) 4 * size, (double) hll.count(), delta4);
+    assertEquals((double) 4 * size, hll.count(), delta4);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // invalid merge -- smaller register merge to bigger
@@ -95,27 +96,27 @@
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
     double delta4 = threshold * (4*size) / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
+    assertEquals(size, hll.count(), delta);
+    assertEquals(size, hll2.count(), delta);
 
     // merge
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // merge should update registers and hence the count
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // new merge
     hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
+    assertEquals((double) 3 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // valid merge -- register set size gets bigger & dense automatically
     hll.merge(hll4);
-    assertEquals((double) 4 * size, (double) hll.count(), delta4);
+    assertEquals((double) 4 * size, hll.count(), delta4);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // invalid merge -- smaller register merge to bigger
@@ -140,27 +141,27 @@
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
+    assertEquals(size, hll.count(), delta);
+    assertEquals(size, hll2.count(), delta);
 
     // sparse-sparse merge
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // merge should update registers and hence the count
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // sparse-dense merge
     hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
+    assertEquals((double) 3 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // merge should convert hll2 to DENSE
     hll2.merge(hll4);
-    assertEquals((double) 2 * size, (double) hll2.count(), delta);
+    assertEquals((double) 2 * size, hll2.count(), delta);
     assertEquals(EncodingType.DENSE, hll2.getEncoding());
 
     // invalid merge -- smaller register merge to bigger
@@ -185,27 +186,27 @@
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
+    assertEquals(size, hll.count(), delta);
+    assertEquals(size, hll2.count(), delta);
 
     // sparse-sparse merge
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // merge should update registers and hence the count
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // sparse-dense merge
     hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
+    assertEquals((double) 3 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // merge should convert hll3 to DENSE
     hll3.merge(hll4);
-    assertEquals((double) 2 * size, (double) hll3.count(), delta);
+    assertEquals((double) 2 * size, hll3.count(), delta);
     assertEquals(EncodingType.DENSE, hll3.getEncoding());
 
     // invalid merge -- smaller register merge to bigger
@@ -231,27 +232,27 @@
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
-    assertEquals((double) size, (double) hll2.count(), delta);
+    assertEquals(size, hll.count(), delta);
+    assertEquals(size, hll2.count(), delta);
 
     // sparse-sparse merge
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // merge should update registers and hence the count
     hll.merge(hll2);
-    assertEquals((double) 2 * size, (double) hll.count(), delta);
+    assertEquals((double) 2 * size, hll.count(), delta);
     assertEquals(EncodingType.SPARSE, hll.getEncoding());
 
     // sparse-sparse overload to dense
     hll.merge(hll3);
-    assertEquals((double) 3 * size, (double) hll.count(), delta);
+    assertEquals((double) 3 * size, hll.count(), delta);
     assertEquals(EncodingType.DENSE, hll.getEncoding());
 
     // merge should convert hll2 to DENSE
     hll2.merge(hll4);
-    assertEquals((double) 2 * size, (double) hll2.count(), delta);
+    assertEquals((double) 2 * size, hll2.count(), delta);
     assertEquals(EncodingType.DENSE, hll2.getEncoding());
 
     // invalid merge -- smaller register merge to bigger
@@ -268,7 +269,7 @@
     }
     double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
     double delta = threshold * size / 100;
-    assertEquals((double) size, (double) hll.count(), delta);
+    assertEquals(size, hll.count(), delta);
   }
 
   @Test
@@ -296,7 +297,7 @@
               .squash(small.getNumRegisterIndexBits());
           assertEquals(small.count(), mush.count(), 0);
           double delta = Math.ceil(small.getStandardError()*size);
-          assertEquals((double) size, (double) mush.count(), delta);
+          assertEquals(size, mush.count(), delta);
         }
       }
     }
@@ -316,7 +317,7 @@
     }
 
     p14HLL.squash(p10HLL.getNumRegisterIndexBits());
-    assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0);
+    assertEquals(size, p14HLL.count(), longRangeTolerance * size / 100.0);
   }
 
   @Test
@@ -333,6 +334,26 @@
     }
 
     p14HLL.squash(p10HLL.getNumRegisterIndexBits());
-    assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0);
+    assertEquals(size, p14HLL.count(), longRangeTolerance * size / 100.0);
   }
+
+  @Test
+  public void testAbletoRetainAccuracyUpToSwitchThreshold() {
+    int maxThreshold = HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold();
+    testRetainAccuracy(70);
+    testRetainAccuracy(maxThreshold / 2);
+    testRetainAccuracy(maxThreshold);
+  }
+
+  private void testRetainAccuracy(int numElements) {
+    HyperLogLog h = HyperLogLog.builder().setSizeOptimized().build();
+    assertTrue(numElements <= h.getEncodingSwitchThreshold());
+    for (int ia = 0; ia <= 10; ia++) {
+      for (int i = 1; i <= numElements; i++) {
+        h.addLong(i);
+      }
+    }
+    assertEquals(numElements, h.estimateNumDistinctValues());
+  }
+
 }