diff --git ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index cdd934c..a51f258 100644 --- ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -237,7 +237,7 @@ min -64 max 62 num_nulls 3115 -distinct_count 130 +distinct_count 127 avg_col_len max_col_len num_trues diff --git ql/src/test/results/clientpositive/groupby_join_pushdown.q.out ql/src/test/results/clientpositive/groupby_join_pushdown.q.out index eed2f65..320ee4d 100644 --- ql/src/test/results/clientpositive/groupby_join_pushdown.q.out +++ ql/src/test/results/clientpositive/groupby_join_pushdown.q.out @@ -889,13 +889,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint) Execution mode: vectorized Reduce Operator Tree: @@ -904,7 +904,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -921,7 +921,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint) TableScan Reduce Output Operator @@ -929,7 +929,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -938,14 +938,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -970,20 +970,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1038,13 +1038,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: vectorized Reduce Operator Tree: @@ -1053,7 +1053,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1070,7 +1070,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) TableScan Reduce Output Operator @@ -1078,7 +1078,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -1087,14 +1087,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col1 - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1119,20 +1119,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 99 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1187,13 +1187,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1202,7 +1202,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1219,7 +1219,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1227,7 +1227,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1237,14 +1237,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col1, _col3 - Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1270,13 +1270,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1285,7 +1285,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1340,13 +1340,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1355,7 +1355,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1372,7 +1372,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1380,7 +1380,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1390,14 +1390,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 100 Data size: 1904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1423,13 +1423,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1438,7 +1438,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1493,13 +1493,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1508,7 +1508,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1525,7 +1525,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1533,7 +1533,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1543,14 +1543,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 100 Data size: 1904 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1576,13 +1576,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1591,7 +1591,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 99 Data size: 1092 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1667,14 +1667,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1161499 Data size: 13900620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1188936 Data size: 14229864 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: tinyint), _col2 (type: tinyint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1691,7 +1691,7 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1700,14 +1700,14 @@ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 17161 Data size: 205752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 196440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17161 Data size: 205752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 196440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1793,14 +1793,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 858611 Data size: 10275444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 885725 Data size: 10600812 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: tinyint), _col2 (type: tinyint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9801 Data size: 156608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1817,7 +1817,7 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) - Statistics: Num rows: 9801 Data size: 156608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1826,14 +1826,14 @@ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 9801 Data size: 156608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9801 Data size: 117508 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 110500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9801 Data size: 117508 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9216 Data size: 110500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 1a05333..a81a79f 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -192,7 +192,7 @@ keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 221 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 85d2e19..06cd5c2 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -1253,10 +1253,10 @@ 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1336,10 +1336,10 @@ outputColumnNames: _col0, _col1 input vertices: 0 Reducer 2 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1464,14 +1464,14 @@ 0 _col1 (type: double) 1 _col1 (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1580,14 +1580,14 @@ 0 _col1 (type: double) 1 _col1 (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 36bd120..925b400 100644 --- ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -627,7 +627,7 @@ Stage-1 Map 1 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=401/480 width=95) + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -668,48 +668,41 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap + Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=633/1166 width=95) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_47] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_24] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select s1.key as key, s1.value as value from tab_n6 s1 join tab2_n3 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -749,7 +742,7 @@ Stage-1 Map 1 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=401/480 width=95) + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -798,48 +791,41 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap + Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=633/1166 width=95) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_47] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab2_n3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_24] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab2_n3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab2_n3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab2_n3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab_n6 s1 join tab_n6 s3 on s1.key=s3.key UNION ALL @@ -901,7 +887,7 @@ Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_22] - Merge Join Operator [MERGEJOIN_60] (rows=1061/1646 width=8) + Merge Join Operator [MERGEJOIN_60] (rows=1029/1646 width=8) Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_19] @@ -916,7 +902,7 @@ <-Map 1 [CONTAINS] llap Reduce Output Operator [RS_70] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_67] (rows=401/480 width=4) + Merge Join Operator [MERGEJOIN_67] (rows=382/480 width=4) Conds:SEL_65._col0=SEL_5._col0(Inner),Output:["_col0"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -985,25 +971,24 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Map 10 (SIMPLE_EDGE), Union 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Union 3 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap + Reducer 5 llap File Output Operator [FS_31] Group By Operator [GBY_29] (rows=1/1 width=8) Output:["_col0"],aggregations:["count()"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_28] - Merge Join Operator [MERGEJOIN_81] (rows=1443/3768 width=8) - Conds:Union 4._col0=RS_25._col0(Inner) - <-Map 10 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_82] (rows=1396/3768 width=8) + Conds:Union 3._col0=RS_25._col0(Inner) + <-Map 9 [SIMPLE_EDGE] llap SHUFFLE [RS_25] PartitionCols:_col0 Select Operator [SEL_23] (rows=500/500 width=4) @@ -1012,53 +997,47 @@ predicate:key is not null TableScan [TS_21] (rows=500/500 width=4) default@tab_part_n7,b_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Union 4 [SIMPLE_EDGE] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_89] + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_90] PartitionCols:_col0 - Select Operator [SEL_87] (rows=242/242 width=4) + Select Operator [SEL_88] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_86] (rows=242/242 width=4) + Filter Operator [FIL_87] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_85] (rows=242/242 width=4) + TableScan [TS_86] (rows=242/242 width=4) Output:["key"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_84] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_85] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=633/1166 width=4) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0"] - <-Map 8 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_83] (rows=604/1166 width=4) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_43] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_42] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_44] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_79] (rows=382/480 width=4) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_42] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_43] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_44] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: CREATE TABLE a_n14(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 63e524d..9a4ceee 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -577,7 +577,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -681,7 +681,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -789,7 +789,7 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 48d75cd..74f137c 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -628,12 +628,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -642,7 +642,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE @@ -753,12 +753,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -767,7 +767,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE @@ -882,12 +882,12 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -896,7 +896,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 208646b..37a9819 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -582,7 +582,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -687,7 +687,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -796,7 +796,7 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index e74bc44..5b14c91 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -289,10 +289,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 + minReductionHashAggr: 0.918 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -302,7 +302,7 @@ className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -335,7 +335,7 @@ keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -344,7 +344,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -362,7 +362,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -495,10 +495,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 + minReductionHashAggr: 0.918 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -508,7 +508,7 @@ className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -540,7 +540,7 @@ keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0) Group By Vectorization: @@ -554,7 +554,7 @@ keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 85 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -562,7 +562,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -571,7 +571,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -588,13 +588,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index 606aa62..72b8f87 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -64,7 +64,7 @@ 1 _col0 (type: tinyint) input vertices: 1 Map 4 - Statistics: Num rows: 1528346 Data size: 12226768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1564475 Data size: 12515800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index e1218d6..3dda1f1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -739,12 +739,12 @@ native: true predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 2:int, val 500000000), FilterDoubleColGreaterDoubleScalar(col 5:double, val 1.0E9), FilterLongColEqualLongScalar(col 0:tinyint, val 0)) predicate: ((cint > 500000000) or (cdouble > 1.0E9D) or (ctinyint = 0Y)) (type: boolean) - Statistics: Num rows: 3378 Data size: 60552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 60576 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++ keys: cint (type: int), cbigint (type: bigint), ctinyint (type: tinyint), (UDFToDouble(cint) / UDFToDouble((cint - 528534767))) (type: double), (UDFToDouble(cbigint) / UDFToDouble((cbigint - 1018195815L))) (type: double), (UDFToDouble(ctinyint) / UDFToDouble(ctinyint)) (type: double), (cint % (cint - 528534767)) (type: int), (cbigint % (cbigint - 1018195815L)) (type: bigint), (ctinyint % ctinyint) (type: tinyint) null sort order: zzzzzzzzz - Statistics: Num rows: 3378 Data size: 60552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 60576 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -758,7 +758,7 @@ native: true projectedOutputColumnNums: [2, 3, 0, 17, 19, 21, 18, 24, 14] selectExpressions: DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 13:double, CastLongToDouble(col 14:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 14:int) -> 15:double) -> 17:double, DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 13:double, CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 14:bigint) -> 15:double) -> 19:double, DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 21:double, LongColModuloLongColumn(col 2:int, col 14:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 14:int) -> 18:int, LongColModuloLongColumn(col 3:bigint, col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 14:bigint) -> 24:bigint, LongColModuloLongColumn(col 0:tinyint, col 0:tinyint) -> 14:tinyint - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: int), _col7 (type: bigint), _col8 (type: tinyint) null sort order: zzzzzzzzz @@ -767,7 +767,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -796,7 +796,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out index a655c16..8f8dcbe 100644 --- ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out @@ -188,16 +188,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -224,14 +224,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -530,16 +530,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs (cache only) @@ -561,14 +561,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -872,16 +872,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -908,14 +908,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1262,16 +1262,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1293,14 +1293,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 36276e1..14f58a6 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -335,10 +335,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -350,7 +350,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:double, 2:bigint - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -398,12 +398,12 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: tinyint), (_col1 / _col2) (type: double) null sort order: zz - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -417,7 +417,7 @@ native: true projectedOutputColumnNums: [0, 4] selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 4:double - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) null sort order: zz @@ -427,7 +427,7 @@ keyColumns: 0:tinyint, 4:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -452,7 +452,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -565,10 +565,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -580,7 +580,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 0:tinyint - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs @@ -626,7 +626,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -814,12 +814,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: tinyint), _col1 (type: bigint) null sort order: zz - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -834,7 +834,7 @@ keyColumns: 0:tinyint, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -859,7 +859,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index ca71b6e..e7dba7f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -477,10 +477,10 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index 395e3e0..8bfe3dc 100644 --- ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -55,7 +55,7 @@ outputColumnNames: _col3 input vertices: 1 Map 4 - Statistics: Num rows: 1389803 Data size: 11104552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1433691 Data size: 11455656 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3) minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 152f4f2..762dcbf 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -158,16 +158,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -194,14 +194,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 1cdecf1..68a672c 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -242,7 +242,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -253,7 +253,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized @@ -276,11 +276,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -373,7 +373,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -384,7 +384,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -405,7 +405,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -533,7 +533,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 6208046..54e951e 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -97,7 +97,7 @@ keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) - Statistics: Num rows: 1528346 Data size: 12226768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1564475 Data size: 12515800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index c121d9d..e6d0569 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -267,7 +267,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -278,7 +278,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -306,11 +306,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -334,7 +334,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -360,7 +360,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -454,7 +454,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -465,7 +465,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -492,7 +492,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -627,7 +627,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -651,7 +651,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -677,7 +677,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 5aecbe8..4c209bb 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -338,10 +338,10 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java index d5ac54a..d454e37 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -104,21 +104,21 @@ /** *
* Input: 64 bit hashcode
- *
+ *
* |---------w-------------| |------------p'----------|
* 10101101.......1010101010 10101010101 01010101010101
* |------p-----|
- *
+ *
* Output: 32 bit int
- *
+ *
* |b| |-q'-| |------------p'----------|
* 1 010101 01010101010 10101010101010
* |------p-----|
- *
- *
+ *
+ *
* The default values of p', q' and b are 25, 6, 1 (total 32 bits) respectively.
* This function will return an int encoded in the following format
- *
+ *
* p - LSB p bits represent the register index
* p' - LSB p' bits are used for increased accuracy in estimation
* q' - q' bits after p' are left as such from the hashcode if b = 0 else
@@ -148,8 +148,12 @@
}
}
- public int getSize() {
- return sparseMap.size() + tempListIdx;
+ public boolean isSizeGreaterThan(int s) {
+ if (sparseMap.size() + tempListIdx > s) {
+ mergeTempListToSparseMap();
+ return sparseMap.size() > s;
+ }
+ return false;
}
public void merge(HLLRegister hllRegister) {
@@ -195,7 +199,7 @@
byte lr = entry.getValue(); // this can be a max of 65, never > 127
if (lr != 0) {
// should be a no-op for sparse
- dest.add((long) ((1 << (p + lr - 1)) | idx));
+ dest.add((1 << (p + lr - 1)) | idx);
}
}
}
diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index 91a6865..edf587f 100644
--- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -20,7 +20,6 @@
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.TreeMap;
@@ -30,17 +29,19 @@
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hive.common.util.Murmur3;
+import com.google.common.annotations.VisibleForTesting;
+
/**
*
* This is an implementation of the following variants of hyperloglog (HLL)
- * algorithm
+ * algorithm
* Original - Original HLL algorithm from Flajolet et. al from
* http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
* HLLNoBias - Google's implementation of bias correction based on lookup table
* http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
* HLL++ - Google's implementation of HLL++ algorithm that uses SPARSE registers
* http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
- *
+ *
* Following are the constructor parameters that determines which algorithm is
* used
* numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
@@ -194,7 +195,7 @@
} else if (hashBits <= 64) {
alphaMM = 0.709f;
} else {
- alphaMM = 0.7213f / (float) (1 + 1.079f / m);
+ alphaMM = 0.7213f / (1 + 1.079f / m);
}
// For efficiency alpha is multiplied by m^2
@@ -258,7 +259,7 @@
// if size of sparse map excess the threshold convert the sparse map to
// dense register and switch to DENSE encoding
- if (sparseRegister.getSize() > encodingSwitchThreshold) {
+ if (sparseRegister.isSizeGreaterThan(encodingSwitchThreshold)) {
encoding = EncodingType.DENSE;
denseRegister = sparseToDenseRegister(sparseRegister);
sparseRegister = null;
@@ -386,7 +387,7 @@
}
private long linearCount(int mVal, long numZeros) {
- return (long) (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
+ return (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
}
// refer paper
@@ -459,7 +460,7 @@
sparseRegister.merge(hll.getHLLSparseRegister());
// if after merge the sparse switching threshold is exceeded then change
// to dense encoding
- if (sparseRegister.getSize() > encodingSwitchThreshold) {
+ if (sparseRegister.isSizeGreaterThan(encodingSwitchThreshold)) {
encoding = EncodingType.DENSE;
denseRegister = sparseToDenseRegister(sparseRegister);
sparseRegister = null;
@@ -481,7 +482,7 @@
/**
* Reduces the accuracy of the HLL provided to a smaller size
- * @param p0
+ * @param p0
* - new p size for the new HyperLogLog (smaller or no change)
* @return reduced (or same) HyperLogLog instance
*/
@@ -661,4 +662,9 @@
return o instanceof HyperLogLog;
}
+ @VisibleForTesting
+ public int getEncodingSwitchThreshold() {
+ return encodingSwitchThreshold;
+ }
+
}
diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
index e014fb5..e720ec8 100644
--- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
+++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.common.ndv.hll;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType;
import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest;
@@ -49,27 +50,27 @@
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
double delta4 = threshold * (4*size) / 100;
- assertEquals((double) size, (double) hll.count(), delta);
- assertEquals((double) size, (double) hll2.count(), delta);
+ assertEquals(size, hll.count(), delta);
+ assertEquals(size, hll2.count(), delta);
// merge
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// merge should update registers and hence the count
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// new merge
hll.merge(hll3);
- assertEquals((double) 3 * size, (double) hll.count(), delta);
+ assertEquals((double) 3 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
- // valid merge -- register set size gets bigger (also 4k items
+ // valid merge -- register set size gets bigger (also 4k items
hll.merge(hll4);
- assertEquals((double) 4 * size, (double) hll.count(), delta4);
+ assertEquals((double) 4 * size, hll.count(), delta4);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// invalid merge -- smaller register merge to bigger
@@ -95,27 +96,27 @@
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
double delta4 = threshold * (4*size) / 100;
- assertEquals((double) size, (double) hll.count(), delta);
- assertEquals((double) size, (double) hll2.count(), delta);
+ assertEquals(size, hll.count(), delta);
+ assertEquals(size, hll2.count(), delta);
// merge
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// merge should update registers and hence the count
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// new merge
hll.merge(hll3);
- assertEquals((double) 3 * size, (double) hll.count(), delta);
+ assertEquals((double) 3 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// valid merge -- register set size gets bigger & dense automatically
hll.merge(hll4);
- assertEquals((double) 4 * size, (double) hll.count(), delta4);
+ assertEquals((double) 4 * size, hll.count(), delta4);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// invalid merge -- smaller register merge to bigger
@@ -140,27 +141,27 @@
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
- assertEquals((double) size, (double) hll.count(), delta);
- assertEquals((double) size, (double) hll2.count(), delta);
+ assertEquals(size, hll.count(), delta);
+ assertEquals(size, hll2.count(), delta);
// sparse-sparse merge
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// merge should update registers and hence the count
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// sparse-dense merge
hll.merge(hll3);
- assertEquals((double) 3 * size, (double) hll.count(), delta);
+ assertEquals((double) 3 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// merge should convert hll2 to DENSE
hll2.merge(hll4);
- assertEquals((double) 2 * size, (double) hll2.count(), delta);
+ assertEquals((double) 2 * size, hll2.count(), delta);
assertEquals(EncodingType.DENSE, hll2.getEncoding());
// invalid merge -- smaller register merge to bigger
@@ -185,27 +186,27 @@
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
- assertEquals((double) size, (double) hll.count(), delta);
- assertEquals((double) size, (double) hll2.count(), delta);
+ assertEquals(size, hll.count(), delta);
+ assertEquals(size, hll2.count(), delta);
// sparse-sparse merge
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// merge should update registers and hence the count
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// sparse-dense merge
hll.merge(hll3);
- assertEquals((double) 3 * size, (double) hll.count(), delta);
+ assertEquals((double) 3 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// merge should convert hll3 to DENSE
hll3.merge(hll4);
- assertEquals((double) 2 * size, (double) hll3.count(), delta);
+ assertEquals((double) 2 * size, hll3.count(), delta);
assertEquals(EncodingType.DENSE, hll3.getEncoding());
// invalid merge -- smaller register merge to bigger
@@ -231,27 +232,27 @@
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
- assertEquals((double) size, (double) hll.count(), delta);
- assertEquals((double) size, (double) hll2.count(), delta);
+ assertEquals(size, hll.count(), delta);
+ assertEquals(size, hll2.count(), delta);
// sparse-sparse merge
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// merge should update registers and hence the count
hll.merge(hll2);
- assertEquals((double) 2 * size, (double) hll.count(), delta);
+ assertEquals((double) 2 * size, hll.count(), delta);
assertEquals(EncodingType.SPARSE, hll.getEncoding());
// sparse-sparse overload to dense
hll.merge(hll3);
- assertEquals((double) 3 * size, (double) hll.count(), delta);
+ assertEquals((double) 3 * size, hll.count(), delta);
assertEquals(EncodingType.DENSE, hll.getEncoding());
// merge should convert hll2 to DENSE
hll2.merge(hll4);
- assertEquals((double) 2 * size, (double) hll2.count(), delta);
+ assertEquals((double) 2 * size, hll2.count(), delta);
assertEquals(EncodingType.DENSE, hll2.getEncoding());
// invalid merge -- smaller register merge to bigger
@@ -268,7 +269,7 @@
}
double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance;
double delta = threshold * size / 100;
- assertEquals((double) size, (double) hll.count(), delta);
+ assertEquals(size, hll.count(), delta);
}
@Test
@@ -296,7 +297,7 @@
.squash(small.getNumRegisterIndexBits());
assertEquals(small.count(), mush.count(), 0);
double delta = Math.ceil(small.getStandardError()*size);
- assertEquals((double) size, (double) mush.count(), delta);
+ assertEquals(size, mush.count(), delta);
}
}
}
@@ -316,7 +317,7 @@
}
p14HLL.squash(p10HLL.getNumRegisterIndexBits());
- assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0);
+ assertEquals(size, p14HLL.count(), longRangeTolerance * size / 100.0);
}
@Test
@@ -333,6 +334,26 @@
}
p14HLL.squash(p10HLL.getNumRegisterIndexBits());
- assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0);
+ assertEquals(size, p14HLL.count(), longRangeTolerance * size / 100.0);
}
+
+ @Test
+ public void testAbletoRetainAccuracyUpToSwitchThreshold() {
+ int maxThreshold = HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold();
+ testRetainAccuracy(70);
+ testRetainAccuracy(maxThreshold / 2);
+ testRetainAccuracy(maxThreshold);
+ }
+
+ private void testRetainAccuracy(int numElements) {
+ HyperLogLog h = HyperLogLog.builder().setSizeOptimized().build();
+ assertTrue(numElements <= h.getEncodingSwitchThreshold());
+ for (int ia = 0; ia <= 10; ia++) {
+ for (int i = 1; i <= numElements; i++) {
+ h.addLong(i);
+ }
+ }
+ assertEquals(numElements, h.estimateNumDistinctValues());
+ }
+
}