diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/HyperLogLogBench.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/HyperLogLogBench.java new file mode 100644 index 0000000..1a9ebb1 --- /dev/null +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/serde/HyperLogLogBench.java @@ -0,0 +1,132 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hive.benchmark.serde; + +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * java -cp target/benchmarks.jar org.apache.hive.benchmark.serde.HyperLogLogBench + */ +@State(Scope.Benchmark) +public class HyperLogLogBench { + public static final int DEFAULT_ITER_TIME = 1000000; + + @BenchmarkMode(Mode.AverageTime) + @Fork(1) + @State(Scope.Thread) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public static abstract class Abstract { + + @Setup + public abstract void setup(); + + @Benchmark + @Warmup(iterations = 3, time = 2, timeUnit = TimeUnit.MILLISECONDS) + @Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.MILLISECONDS) + public void bench() { + + } + } + + + public abstract static class SizeOptimizedSparseStressN extends Abstract { + + private HyperLogLog hll; + private final int stressN; + private final int numIterations; + + public SizeOptimizedSparseStressN(int stressN) { + this.stressN = stressN; + numIterations = DEFAULT_ITER_TIME / stressN; + } + + @Override + public void setup() { + hll = HyperLogLog.builder().setSizeOptimized().build(); + } + + @Override + public void bench() { + for (int i = 0; i < numIterations; i++) { + for (int j = 0; j < stressN; j++) { + hll.addInt(j); + } + } + } + + } + + public static class SizeOptimizedSparseStress30 extends SizeOptimizedSparseStressN { + public SizeOptimizedSparseStress30() { + super(30); + } + } + + public static class SizeOptimizedSparseStress70 extends SizeOptimizedSparseStressN { + public SizeOptimizedSparseStress70() { + super(70); + } + } + + public static class SizeOptimizedSparseStressTminus10 extends SizeOptimizedSparseStressN { + public SizeOptimizedSparseStressTminus10() { + super(HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold()-10); + } + } + + public static class SizeOptimizedSparseStressTminus1 extends SizeOptimizedSparseStressN { + public SizeOptimizedSparseStressTminus1() { + super(HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold() - 1); + } + } + + public static class SizeOptimizedSparseStressT extends SizeOptimizedSparseStressN { + public SizeOptimizedSparseStressT() { + super(HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold()); + } + } + + public static class SizeOptimizedDenseStress2T extends SizeOptimizedSparseStressN { + public SizeOptimizedDenseStress2T() { + super(2 * HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold()); + } + } + + public static class SizeOptimizedDenseStress8T extends SizeOptimizedSparseStressN { + public SizeOptimizedDenseStress8T() { + super(8*HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold()); + } + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(".*" + HyperLogLogBench.class.getSimpleName() + ".*").build(); + new Runner(opt).run(); + } +} diff --git ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index cdd934c..a51f258 100644 --- ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -237,7 +237,7 @@ min -64 max 62 num_nulls 3115 -distinct_count 130 +distinct_count 127 avg_col_len max_col_len num_trues diff --git ql/src/test/results/clientpositive/groupby_join_pushdown.q.out ql/src/test/results/clientpositive/groupby_join_pushdown.q.out index 2138eae..14fc0b7 100644 --- ql/src/test/results/clientpositive/groupby_join_pushdown.q.out +++ ql/src/test/results/clientpositive/groupby_join_pushdown.q.out @@ -889,13 +889,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: tinyint) Execution mode: vectorized Reduce Operator Tree: @@ -904,7 +912,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -921,7 +933,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: tinyint) TableScan Reduce Output Operator @@ -929,7 +945,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Operator Tree: Join Operator condition map: @@ -938,6 +958,16 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 97 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 132 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint) @@ -946,6 +976,7 @@ File Output Operator compressed: false Statistics: Num rows: 132 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -970,20 +1001,32 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1038,13 +1081,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: int) Execution mode: vectorized Reduce Operator Tree: @@ -1053,7 +1104,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1070,7 +1125,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 676 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: int) TableScan Reduce Output Operator @@ -1078,7 +1137,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Operator Tree: Join Operator condition map: @@ -1087,6 +1150,16 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col1 +<<<<<<< HEAD + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 97 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 132 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) @@ -1095,6 +1168,7 @@ File Output Operator compressed: false Statistics: Num rows: 132 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1119,20 +1193,32 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1187,13 +1273,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1202,7 +1296,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1219,7 +1317,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1227,7 +1329,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1237,6 +1343,16 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col1, _col3 +<<<<<<< HEAD + Statistics: Num rows: 97 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 97 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 97 Data size: 776 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 132 Data size: 2112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint) @@ -1245,6 +1361,7 @@ File Output Operator compressed: false Statistics: Num rows: 132 Data size: 1056 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1270,13 +1387,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1285,7 +1410,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1340,13 +1469,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1355,7 +1492,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1372,7 +1513,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1380,7 +1525,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1390,6 +1539,16 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col3 +<<<<<<< HEAD + Statistics: Num rows: 97 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 132 Data size: 2512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) @@ -1398,6 +1557,7 @@ File Output Operator compressed: false Statistics: Num rows: 132 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1423,13 +1583,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1438,7 +1606,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1493,13 +1665,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1508,7 +1688,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1525,7 +1709,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) TableScan Reduce Output Operator @@ -1533,7 +1721,11 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Reduce Operator Tree: Join Operator @@ -1543,6 +1735,16 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col3 +<<<<<<< HEAD + Statistics: Num rows: 97 Data size: 1848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 97 Data size: 1072 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 132 Data size: 2512 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) @@ -1551,6 +1753,7 @@ File Output Operator compressed: false Statistics: Num rows: 132 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1576,13 +1779,21 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col1 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1591,7 +1802,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 +<<<<<<< HEAD + Statistics: Num rows: 96 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1667,14 +1882,14 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1161499 Data size: 13900620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1188936 Data size: 14229864 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: tinyint), _col2 (type: tinyint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1691,7 +1906,7 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1700,14 +1915,14 @@ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17161 Data size: 274216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 261808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: tinyint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 17161 Data size: 205752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 196440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17161 Data size: 205752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16384 Data size: 196440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1793,14 +2008,22 @@ 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD + Statistics: Num rows: 885725 Data size: 10600812 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 647260 Data size: 7739232 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Group By Operator aggregations: sum(_col1) keys: _col0 (type: tinyint), _col2 (type: tinyint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 17161 Data size: 274088 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false table: @@ -1817,7 +2040,11 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 17161 Data size: 274088 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master value expressions: _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -1826,6 +2053,16 @@ keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 +<<<<<<< HEAD + Statistics: Num rows: 9216 Data size: 147272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9216 Data size: 110500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 9216 Data size: 110500 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 17161 Data size: 274088 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: tinyint) @@ -1834,6 +2071,7 @@ File Output Operator compressed: false Statistics: Num rows: 17161 Data size: 205688 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index 1a05333..a81a79f 100644 --- ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -192,7 +192,7 @@ keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 221 Data size: 1768 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index 85d2e19..06cd5c2 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -1253,10 +1253,10 @@ 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1336,10 +1336,10 @@ outputColumnNames: _col0, _col1 input vertices: 0 Reducer 2 - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1464,14 +1464,14 @@ 0 _col1 (type: double) 1 _col1 (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1580,14 +1580,14 @@ 0 _col1 (type: double) 1 _col1 (type: double) outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 200 Data size: 36400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 191 Data size: 34762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out index 36bd120..925b400 100644 --- ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out +++ ql/src/test/results/clientpositive/llap/explainanalyze_2.q.out @@ -627,7 +627,7 @@ Stage-1 Map 1 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=401/480 width=95) + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -668,48 +668,41 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap + Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=633/1166 width=95) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_47] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_24] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select s1.key as key, s1.value as value from tab_n6 s1 join tab2_n3 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -749,7 +742,7 @@ Stage-1 Map 1 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=401/480 width=95) + Merge Join Operator [MERGEJOIN_25] (rows=382/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -798,48 +791,41 @@ Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 3 llap + Reducer 2 llap File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_46] (rows=633/1166 width=95) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0","_col1"] - <-Map 5 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_47] (rows=604/1166 width=95) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_23] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab2_n3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_22] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_24] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab2_n3,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_45] (rows=382/480 width=95) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab2_n3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_24] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab2_n3,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab_n6 s1 join tab_n6 s3 on s1.key=s3.key UNION ALL @@ -901,7 +887,7 @@ Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_22] - Merge Join Operator [MERGEJOIN_60] (rows=1061/1646 width=8) + Merge Join Operator [MERGEJOIN_60] (rows=1029/1646 width=8) Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_19] @@ -916,7 +902,7 @@ <-Map 1 [CONTAINS] llap Reduce Output Operator [RS_70] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_67] (rows=401/480 width=4) + Merge Join Operator [MERGEJOIN_67] (rows=382/480 width=4) Conds:SEL_65._col0=SEL_5._col0(Inner),Output:["_col0"] <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] @@ -985,25 +971,24 @@ Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Union 4 (CONTAINS) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) -Reducer 5 <- Map 10 (SIMPLE_EDGE), Union 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Map 8 <- Union 3 (CONTAINS) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 3 (CONTAINS) +Reducer 4 <- Map 9 (SIMPLE_EDGE), Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 llap + Reducer 5 llap File Output Operator [FS_31] Group By Operator [GBY_29] (rows=1/1 width=8) Output:["_col0"],aggregations:["count()"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_28] - Merge Join Operator [MERGEJOIN_81] (rows=1443/3768 width=8) - Conds:Union 4._col0=RS_25._col0(Inner) - <-Map 10 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_82] (rows=1396/3768 width=8) + Conds:Union 3._col0=RS_25._col0(Inner) + <-Map 9 [SIMPLE_EDGE] llap SHUFFLE [RS_25] PartitionCols:_col0 Select Operator [SEL_23] (rows=500/500 width=4) @@ -1012,53 +997,47 @@ predicate:key is not null TableScan [TS_21] (rows=500/500 width=4) default@tab_part_n7,b_n10,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Union 4 [SIMPLE_EDGE] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_89] + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_90] PartitionCols:_col0 - Select Operator [SEL_87] (rows=242/242 width=4) + Select Operator [SEL_88] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_86] (rows=242/242 width=4) + Filter Operator [FIL_87] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_85] (rows=242/242 width=4) + TableScan [TS_86] (rows=242/242 width=4) Output:["key"] - <-Reducer 3 [CONTAINS] llap - Reduce Output Operator [RS_84] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_85] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_82] (rows=633/1166 width=4) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col0"] - <-Map 8 [SIMPLE_EDGE] llap + Merge Join Operator [MERGEJOIN_83] (rows=604/1166 width=4) + Conds:RS_12._col1=RS_13._col0(Inner),Output:["_col0"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_79] (rows=382/480 width=95) + Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] + <-Select Operator [SEL_5] (rows=242/242 width=4) + Output:["_col0"] + Filter Operator [FIL_43] (rows=242/242 width=4) + predicate:key is not null + TableScan [TS_3] (rows=242/242 width=4) + default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) + Output:["_col0","_col1"] + Filter Operator [FIL_42] (rows=242/242 width=95) + predicate:(key is not null and value is not null) + TableScan [TS_0] (rows=242/242 width=95) + default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 7 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_8] (rows=242/242 width=4) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col0"] - Filter Operator [FIL_44] (rows=242/242 width=4) - predicate:key is not null - TableScan [TS_6] (rows=242/242 width=4) - default@tab_n6,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_79] (rows=382/480 width=4) - Conds:RS_9._col1=RS_10._col0(Inner),Output:["_col0"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_9] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=242/242 width=95) - Output:["_col0","_col1"] - Filter Operator [FIL_42] (rows=242/242 width=95) - predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=95) - default@tab_n6,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=242/242 width=91) - Output:["_col0"] - Filter Operator [FIL_43] (rows=242/242 width=91) - predicate:value is not null - TableScan [TS_3] (rows=242/242 width=91) - default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] + Filter Operator [FIL_44] (rows=242/242 width=91) + predicate:value is not null + TableScan [TS_6] (rows=242/242 width=91) + default@tab_n6,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: CREATE TABLE a_n14(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 63e524d..9a4ceee 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -577,7 +577,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -681,7 +681,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -789,7 +789,7 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 48d75cd..74f137c 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -628,12 +628,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -642,7 +642,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE @@ -753,12 +753,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) Reducer 3 @@ -767,7 +767,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE @@ -882,12 +882,12 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -896,7 +896,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 208646b..37a9819 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -582,7 +582,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -687,7 +687,7 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 @@ -796,7 +796,7 @@ keys: _col2 (type: tinyint) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index e74bc44..5b14c91 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -289,10 +289,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 + minReductionHashAggr: 0.918 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -302,7 +302,7 @@ className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -335,7 +335,7 @@ keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -344,7 +344,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -362,7 +362,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -495,10 +495,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 + minReductionHashAggr: 0.918 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -508,7 +508,7 @@ className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -540,7 +540,7 @@ keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0) Group By Vectorization: @@ -554,7 +554,7 @@ keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 85 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 656 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -562,7 +562,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -571,7 +571,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -588,13 +588,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 82 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out index bc1eba8..d9f6d7e 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join.q.out @@ -64,7 +64,11 @@ 1 _col0 (type: tinyint) input vertices: 1 Map 4 +<<<<<<< HEAD + Statistics: Num rows: 1564475 Data size: 12515800 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1302989 Data size: 10423912 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/vectorization_div0.q.out ql/src/test/results/clientpositive/llap/vectorization_div0.q.out index e1218d6..3dda1f1 100644 --- ql/src/test/results/clientpositive/llap/vectorization_div0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_div0.q.out @@ -739,12 +739,12 @@ native: true predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 2:int, val 500000000), FilterDoubleColGreaterDoubleScalar(col 5:double, val 1.0E9), FilterLongColEqualLongScalar(col 0:tinyint, val 0)) predicate: ((cint > 500000000) or (cdouble > 1.0E9D) or (ctinyint = 0Y)) (type: boolean) - Statistics: Num rows: 3378 Data size: 60552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 60576 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++ keys: cint (type: int), cbigint (type: bigint), ctinyint (type: tinyint), (UDFToDouble(cint) / UDFToDouble((cint - 528534767))) (type: double), (UDFToDouble(cbigint) / UDFToDouble((cbigint - 1018195815L))) (type: double), (UDFToDouble(ctinyint) / UDFToDouble(ctinyint)) (type: double), (cint % (cint - 528534767)) (type: int), (cbigint % (cbigint - 1018195815L)) (type: bigint), (ctinyint % ctinyint) (type: tinyint) null sort order: zzzzzzzzz - Statistics: Num rows: 3378 Data size: 60552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 60576 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -758,7 +758,7 @@ native: true projectedOutputColumnNums: [2, 3, 0, 17, 19, 21, 18, 24, 14] selectExpressions: DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 13:double, CastLongToDouble(col 14:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 14:int) -> 15:double) -> 17:double, DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 13:double, CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 14:bigint) -> 15:double) -> 19:double, DoubleColDivideDoubleColumn(col 13:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 21:double, LongColModuloLongColumn(col 2:int, col 14:int)(children: LongColSubtractLongScalar(col 2:int, val 528534767) -> 14:int) -> 18:int, LongColModuloLongColumn(col 3:bigint, col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 1018195815) -> 14:bigint) -> 24:bigint, LongColModuloLongColumn(col 0:tinyint, col 0:tinyint) -> 14:tinyint - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: int), _col7 (type: bigint), _col8 (type: tinyint) null sort order: zzzzzzzzz @@ -767,7 +767,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -796,7 +796,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 3378 Data size: 161792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3380 Data size: 161872 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out index a655c16..8f8dcbe 100644 --- ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out @@ -188,16 +188,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -224,14 +224,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -530,16 +530,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs (cache only) @@ -561,14 +561,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -872,16 +872,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -908,14 +908,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1262,16 +1262,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1293,14 +1293,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 36276e1..14f58a6 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -335,10 +335,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -350,7 +350,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 1:double, 2:bigint - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -398,12 +398,12 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: tinyint), (_col1 / _col2) (type: double) null sort order: zz - Statistics: Num rows: 131 Data size: 2360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2304 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -417,7 +417,7 @@ native: true projectedOutputColumnNums: [0, 4] selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 4:double - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: double) null sort order: zz @@ -427,7 +427,7 @@ keyColumns: 0:tinyint, 4:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -452,7 +452,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -565,10 +565,10 @@ vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: ctinyint (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -580,7 +580,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumns: 0:tinyint - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs @@ -626,7 +626,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: @@ -814,12 +814,12 @@ keys: _col0 (type: tinyint) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: tinyint), _col1 (type: bigint) null sort order: zz - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE top n: 20 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -834,7 +834,7 @@ keyColumns: 0:tinyint, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 131 Data size: 1312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1280 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -859,7 +859,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 131 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index ca71b6e..e7dba7f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -477,10 +477,10 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out index e946e94..cf14f24 100644 --- ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out @@ -55,7 +55,11 @@ outputColumnNames: _col3 input vertices: 1 Map 4 +<<<<<<< HEAD + Statistics: Num rows: 1433691 Data size: 11455656 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 782315 Data size: 6244648 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Group By Operator aggregations: sum(_col3) minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 152f4f2..762dcbf 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -158,16 +158,16 @@ Group By Operator aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) keys: _col0 (type: tinyint) - minReductionHashAggr: 0.9893392 + minReductionHashAggr: 0.9895833 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -194,14 +194,14 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 131 Data size: 7732 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 4588 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index 1cdecf1..68a672c 100644 --- ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -242,7 +242,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -253,7 +253,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized @@ -276,11 +276,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -373,7 +373,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -384,7 +384,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -405,7 +405,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -533,7 +533,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/vector_left_outer_join.q.out ql/src/test/results/clientpositive/vector_left_outer_join.q.out index 7f0c8c3..baef4ce 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join.q.out @@ -97,7 +97,11 @@ keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) +<<<<<<< HEAD + Statistics: Num rows: 1564475 Data size: 12515800 Basic stats: COMPLETE Column stats: COMPLETE +======= Statistics: Num rows: 1302989 Data size: 10423912 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index c121d9d..e6d0569 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -267,7 +267,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -278,7 +278,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -306,11 +306,11 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 131 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -334,7 +334,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -360,7 +360,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE @@ -454,7 +454,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) null sort order: z @@ -465,7 +465,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -492,7 +492,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE @@ -627,7 +627,7 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -651,7 +651,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized Map Vectorization: @@ -677,7 +677,7 @@ Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 131 Data size: 1444 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 5aecbe8..4c209bb 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -338,10 +338,10 @@ keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 131 Data size: 26596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 128 Data size: 25988 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git standalone-metastore/metastore-server/pom.xml standalone-metastore/metastore-server/pom.xml index 67ebdaf..b29538d 100644 --- standalone-metastore/metastore-server/pom.xml +++ standalone-metastore/metastore-server/pom.xml @@ -234,6 +234,10 @@ com.cronutils cron-utils + + it.unimi.dsi + fastutil + com.microsoft.sqlserver diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java index 3a1d076..7a726aa 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java @@ -26,9 +26,6 @@ public static final int MIN_P_VALUE = 4; public static final int MAX_P_VALUE = 16; - // number of entries to store before being merged to sparse map - public static final int TEMP_LIST_DEFAULT_SIZE = 1024; - // constants for SPARSE encoding public static final int P_PRIME_VALUE = 25; public static final int Q_PRIME_VALUE = 6; diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java index d5ac54a..2813458 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -20,16 +20,13 @@ import java.util.Map; import java.util.Map.Entry; -import java.util.TreeMap; + +import it.unimi.dsi.fastutil.ints.Int2ByteMap; +import it.unimi.dsi.fastutil.ints.Int2ByteOpenHashMap; public class HLLSparseRegister implements HLLRegister { - private TreeMap sparseMap; - - // for a better insertion performance values are added to temporary unsorted - // list which will be merged to sparse map after a threshold - private int[] tempList; - private int tempListIdx; + private Int2ByteMap sparseMap; // number of register bits private final int p; @@ -47,9 +44,7 @@ public HLLSparseRegister(int p, int pp, int qp) { this.p = p; - this.sparseMap = new TreeMap<>(); - this.tempList = new int[HLLConstants.TEMP_LIST_DEFAULT_SIZE]; - this.tempListIdx = 0; + this.sparseMap = new Int2ByteOpenHashMap(); this.pPrime = pp; this.qPrime = qp; this.mask = ((1 << pPrime) - 1) ^ ((1 << p) - 1); @@ -60,65 +55,44 @@ public boolean add(long hashcode) { boolean updated = false; - // fill the temp list before merging to sparse map - if (tempListIdx < tempList.length) { - int encodedHash = encodeHash(hashcode); - tempList[tempListIdx++] = encodedHash; - updated = true; + int encodedHash = encodeHash(hashcode); + + // int encodedHash = tempList[i]; + int key = encodedHash & pPrimeMask; + byte value = (byte) (encodedHash >>> pPrime); + byte nr = 0; + // if MSB is set to 1 then next qPrime MSB bits contains the value of + // number of zeroes. + // if MSB is set to 0 then number of zeroes is contained within pPrime - p + // bits. + if (encodedHash < 0) { + nr = (byte) (value & qPrimeMask); } else { - updated = mergeTempListToSparseMap(); + nr = (byte) (Integer.numberOfTrailingZeros(encodedHash >>> p) + 1); } + updated = set(key, nr); return updated; } /** - * Adds temp list to sparse map. The key for sparse map entry is the register - * index determined by pPrime and value is the number of trailing zeroes. - * @return - */ - private boolean mergeTempListToSparseMap() { - boolean updated = false; - for (int i = 0; i < tempListIdx; i++) { - int encodedHash = tempList[i]; - int key = encodedHash & pPrimeMask; - byte value = (byte) (encodedHash >>> pPrime); - byte nr = 0; - // if MSB is set to 1 then next qPrime MSB bits contains the value of - // number of zeroes. - // if MSB is set to 0 then number of zeroes is contained within pPrime - p - // bits. - if (encodedHash < 0) { - nr = (byte) (value & qPrimeMask); - } else { - nr = (byte) (Integer.numberOfTrailingZeros(encodedHash >>> p) + 1); - } - updated = set(key, nr); - } - - // reset temp list index - tempListIdx = 0; - return updated; - } - - /** *
    * Input: 64 bit hashcode
-   * 
+   *
    * |---------w-------------| |------------p'----------|
    * 10101101.......1010101010 10101010101 01010101010101
    *                                       |------p-----|
-   *                                       
+   *
    * Output: 32 bit int
-   * 
+   *
    * |b| |-q'-|  |------------p'----------|
    *  1  010101  01010101010 10101010101010
    *                         |------p-----|
-   *                    
-   * 
+   *
+   *
    * The default values of p', q' and b are 25, 6, 1 (total 32 bits) respectively.
    * This function will return an int encoded in the following format
-   * 
+   *
    * p  - LSB p bits represent the register index
    * p' - LSB p' bits are used for increased accuracy in estimation
    * q' - q' bits after p' are left as such from the hashcode if b = 0 else
@@ -148,8 +122,8 @@
     }
   }
 
-  public int getSize() {
-    return sparseMap.size() + tempListIdx;
+  public boolean isSizeGreaterThan(int s) {
+    return sparseMap.size() > s;
   }
 
   public void merge(HLLRegister hllRegister) {
@@ -177,14 +151,11 @@
     return false;
   }
 
-  public TreeMap getSparseMap() {
+  public Map getSparseMap() {
     return getMergedSparseMap();
   }
 
-  private TreeMap getMergedSparseMap() {
-    if (tempListIdx != 0) {
-      mergeTempListToSparseMap();
-    }
+  private Map getMergedSparseMap() {
     return sparseMap;
   }
 
@@ -195,7 +166,7 @@
       byte lr = entry.getValue(); // this can be a max of 65, never > 127
       if (lr != 0) {
         // should be a no-op for sparse
-        dest.add((long) ((1 << (p + lr - 1)) | idx));
+        dest.add((1L << (p + lr - 1)) | idx);
       }
     }
   }
@@ -231,15 +202,8 @@
       return false;
     }
     HLLSparseRegister other = (HLLSparseRegister) obj;
-    boolean result = p == other.p && pPrime == other.pPrime && qPrime == other.qPrime
-        && tempListIdx == other.tempListIdx;
+    boolean result = p == other.p && pPrime == other.pPrime && qPrime == other.qPrime;
     if (result) {
-      for (int i = 0; i < tempListIdx; i++) {
-        if (tempList[i] != other.tempList[i]) {
-          return false;
-        }
-      }
-
       result = result && sparseMap.equals(other.sparseMap);
     }
     return result;
@@ -251,9 +215,6 @@
     hashcode += 31 * p;
     hashcode += 31 * pPrime;
     hashcode += 31 * qPrime;
-    for (int i = 0; i < tempListIdx; i++) {
-      hashcode += 31 * tempList[tempListIdx];
-    }
     hashcode += sparseMap.hashCode();
     return hashcode;
   }
diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index 91a6865..edf587f 100644
--- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -20,7 +20,6 @@
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.Map;
 import java.util.TreeMap;
@@ -30,17 +29,19 @@
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hive.common.util.Murmur3;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * 
  * This is an implementation of the following variants of hyperloglog (HLL)
- * algorithm 
+ * algorithm
  * Original  - Original HLL algorithm from Flajolet et. al from
  *             http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
  * HLLNoBias - Google's implementation of bias correction based on lookup table
  *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
  * HLL++     - Google's implementation of HLL++ algorithm that uses SPARSE registers
  *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
- * 
+ *
  * Following are the constructor parameters that determines which algorithm is
  * used
  * numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
@@ -194,7 +195,7 @@
     } else if (hashBits <= 64) {
       alphaMM = 0.709f;
     } else {
-      alphaMM = 0.7213f / (float) (1 + 1.079f / m);
+      alphaMM = 0.7213f / (1 + 1.079f / m);
     }
 
     // For efficiency alpha is multiplied by m^2
@@ -258,7 +259,7 @@
 
       // if size of sparse map excess the threshold convert the sparse map to
       // dense register and switch to DENSE encoding
-      if (sparseRegister.getSize() > encodingSwitchThreshold) {
+      if (sparseRegister.isSizeGreaterThan(encodingSwitchThreshold)) {
         encoding = EncodingType.DENSE;
         denseRegister = sparseToDenseRegister(sparseRegister);
         sparseRegister = null;
@@ -386,7 +387,7 @@
   }
 
   private long linearCount(int mVal, long numZeros) {
-    return (long) (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
+    return (Math.round(mVal * Math.log(mVal / ((double) numZeros))));
   }
 
   // refer paper
@@ -459,7 +460,7 @@
       sparseRegister.merge(hll.getHLLSparseRegister());
       // if after merge the sparse switching threshold is exceeded then change
       // to dense encoding
-      if (sparseRegister.getSize() > encodingSwitchThreshold) {
+      if (sparseRegister.isSizeGreaterThan(encodingSwitchThreshold)) {
         encoding = EncodingType.DENSE;
         denseRegister = sparseToDenseRegister(sparseRegister);
         sparseRegister = null;
@@ -481,7 +482,7 @@
 
   /**
    * Reduces the accuracy of the HLL provided to a smaller size
-   * @param p0 
+   * @param p0
    *         - new p size for the new HyperLogLog (smaller or no change)
    * @return reduced (or same) HyperLogLog instance
    */
@@ -661,4 +662,9 @@
     return o instanceof HyperLogLog;
   }
 
+  @VisibleForTesting
+  public int getEncodingSwitchThreshold() {
+    return encodingSwitchThreshold;
+  }
+
 }
diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
index aeba2e9..703129b 100644
--- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
+++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLogUtils.java
@@ -25,8 +25,6 @@
 import java.io.OutputStream;
 import java.util.Arrays;
 import java.util.Map;
-import java.util.TreeMap;
-
 import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType;
 
 /**
@@ -38,24 +36,24 @@
 
   /**
    * HyperLogLog is serialized using the following format
-   * 
+   *
    * 
-   * |-4 byte-|------varlong----|varint (optional)|----------|  
+   * |-4 byte-|------varlong----|varint (optional)|----------|
    * ---------------------------------------------------------
    * | header | estimated-count | register-length | register |
    * ---------------------------------------------------------
-   * 
+   *
    * 4 byte header is encoded like below
    * 3 bytes - HLL magic string to identify serialized stream
    * 4 bits  - p (number of bits to be used as register index)
    * 1       - spare bit (not used)
    * 3 bits  - encoding (000 - sparse, 001..110 - n bit packing, 111 - no bit packing)
-   * 
+   *
    * Followed by header are 3 fields that are required for reconstruction
    * of hyperloglog
    * Estimated count - variable length long to store last computed estimated count.
    *                   This is just for quick lookup without deserializing registers
-   * Register length - number of entries in the register (required only for 
+   * Register length - number of entries in the register (required only for
    *                   for sparse representation. For bit-packing, the register
    *                   length can be found from p)
    * 
@@ -104,7 +102,7 @@ byte[] register = hll.getHLLDenseRegister().getRegister(); bitpackHLLRegister(out, register, bitWidth); } else if (enc.equals(EncodingType.SPARSE)) { - TreeMap sparseMap = hll.getHLLSparseRegister().getSparseMap(); + Map sparseMap = hll.getHLLSparseRegister().getSparseMap(); // write the number of elements in sparse map (required for // reconstruction) diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java index e014fb5..e720ec8 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/ndv/hll/TestHyperLogLog.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.common.ndv.hll; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog.EncodingType; import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; @@ -49,27 +50,27 @@ double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * size / 100; double delta4 = threshold * (4*size) / 100; - assertEquals((double) size, (double) hll.count(), delta); - assertEquals((double) size, (double) hll2.count(), delta); + assertEquals(size, hll.count(), delta); + assertEquals(size, hll2.count(), delta); // merge hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // merge should update registers and hence the count hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // new merge hll.merge(hll3); - assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals((double) 3 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); - // valid merge -- register set size gets bigger (also 4k items + // valid merge -- register set size gets bigger (also 4k items hll.merge(hll4); - assertEquals((double) 4 * size, (double) hll.count(), delta4); + assertEquals((double) 4 * size, hll.count(), delta4); assertEquals(EncodingType.DENSE, hll.getEncoding()); // invalid merge -- smaller register merge to bigger @@ -95,27 +96,27 @@ double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * size / 100; double delta4 = threshold * (4*size) / 100; - assertEquals((double) size, (double) hll.count(), delta); - assertEquals((double) size, (double) hll2.count(), delta); + assertEquals(size, hll.count(), delta); + assertEquals(size, hll2.count(), delta); // merge hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // merge should update registers and hence the count hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // new merge hll.merge(hll3); - assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals((double) 3 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // valid merge -- register set size gets bigger & dense automatically hll.merge(hll4); - assertEquals((double) 4 * size, (double) hll.count(), delta4); + assertEquals((double) 4 * size, hll.count(), delta4); assertEquals(EncodingType.DENSE, hll.getEncoding()); // invalid merge -- smaller register merge to bigger @@ -140,27 +141,27 @@ } double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * size / 100; - assertEquals((double) size, (double) hll.count(), delta); - assertEquals((double) size, (double) hll2.count(), delta); + assertEquals(size, hll.count(), delta); + assertEquals(size, hll2.count(), delta); // sparse-sparse merge hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // merge should update registers and hence the count hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // sparse-dense merge hll.merge(hll3); - assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals((double) 3 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // merge should convert hll2 to DENSE hll2.merge(hll4); - assertEquals((double) 2 * size, (double) hll2.count(), delta); + assertEquals((double) 2 * size, hll2.count(), delta); assertEquals(EncodingType.DENSE, hll2.getEncoding()); // invalid merge -- smaller register merge to bigger @@ -185,27 +186,27 @@ } double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * size / 100; - assertEquals((double) size, (double) hll.count(), delta); - assertEquals((double) size, (double) hll2.count(), delta); + assertEquals(size, hll.count(), delta); + assertEquals(size, hll2.count(), delta); // sparse-sparse merge hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // merge should update registers and hence the count hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // sparse-dense merge hll.merge(hll3); - assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals((double) 3 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // merge should convert hll3 to DENSE hll3.merge(hll4); - assertEquals((double) 2 * size, (double) hll3.count(), delta); + assertEquals((double) 2 * size, hll3.count(), delta); assertEquals(EncodingType.DENSE, hll3.getEncoding()); // invalid merge -- smaller register merge to bigger @@ -231,27 +232,27 @@ } double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * size / 100; - assertEquals((double) size, (double) hll.count(), delta); - assertEquals((double) size, (double) hll2.count(), delta); + assertEquals(size, hll.count(), delta); + assertEquals(size, hll2.count(), delta); // sparse-sparse merge hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // merge should update registers and hence the count hll.merge(hll2); - assertEquals((double) 2 * size, (double) hll.count(), delta); + assertEquals((double) 2 * size, hll.count(), delta); assertEquals(EncodingType.SPARSE, hll.getEncoding()); // sparse-sparse overload to dense hll.merge(hll3); - assertEquals((double) 3 * size, (double) hll.count(), delta); + assertEquals((double) 3 * size, hll.count(), delta); assertEquals(EncodingType.DENSE, hll.getEncoding()); // merge should convert hll2 to DENSE hll2.merge(hll4); - assertEquals((double) 2 * size, (double) hll2.count(), delta); + assertEquals((double) 2 * size, hll2.count(), delta); assertEquals(EncodingType.DENSE, hll2.getEncoding()); // invalid merge -- smaller register merge to bigger @@ -268,7 +269,7 @@ } double threshold = size > 40000 ? longRangeTolerance : shortRangeTolerance; double delta = threshold * size / 100; - assertEquals((double) size, (double) hll.count(), delta); + assertEquals(size, hll.count(), delta); } @Test @@ -296,7 +297,7 @@ .squash(small.getNumRegisterIndexBits()); assertEquals(small.count(), mush.count(), 0); double delta = Math.ceil(small.getStandardError()*size); - assertEquals((double) size, (double) mush.count(), delta); + assertEquals(size, mush.count(), delta); } } } @@ -316,7 +317,7 @@ } p14HLL.squash(p10HLL.getNumRegisterIndexBits()); - assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0); + assertEquals(size, p14HLL.count(), longRangeTolerance * size / 100.0); } @Test @@ -333,6 +334,26 @@ } p14HLL.squash(p10HLL.getNumRegisterIndexBits()); - assertEquals((double) size, p14HLL.count(), longRangeTolerance * size / 100.0); + assertEquals(size, p14HLL.count(), longRangeTolerance * size / 100.0); } + + @Test + public void testAbletoRetainAccuracyUpToSwitchThreshold() { + int maxThreshold = HyperLogLog.builder().setSizeOptimized().build().getEncodingSwitchThreshold(); + testRetainAccuracy(70); + testRetainAccuracy(maxThreshold / 2); + testRetainAccuracy(maxThreshold); + } + + private void testRetainAccuracy(int numElements) { + HyperLogLog h = HyperLogLog.builder().setSizeOptimized().build(); + assertTrue(numElements <= h.getEncodingSwitchThreshold()); + for (int ia = 0; ia <= 10; ia++) { + for (int i = 1; i <= numElements; i++) { + h.addLong(i); + } + } + assertEquals(numElements, h.estimateNumDistinctValues()); + } + } diff --git standalone-metastore/pom.xml standalone-metastore/pom.xml index 0fa6389..054788a 100644 --- standalone-metastore/pom.xml +++ standalone-metastore/pom.xml @@ -104,6 +104,7 @@ 4.2.0 3.5.5 8.1.1 + 8.3.1 you-must-set-this-to-run-thrift @@ -315,6 +316,11 @@ cron-utils ${cron-utils.version} + + it.unimi.dsi + fastutil + ${fastutil.version} +