diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java new file mode 100644 index 0000000000..0d70cec6fc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyFilter.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import java.util.Comparator; +import java.util.PriorityQueue; + +/** + * This class holds occurred keys while TopNKeyOperator is running. + * Used in {@link TopNKeyOperator} and {@link org.apache.hadoop.hive.ql.exec.vector.VectorTopNKeyOperator} + * @param - Type of {@link KeyWrapper}. Each key is stored in a KeyWrapper instance. + */ +public class TopNKeyFilter { + private final PriorityQueue priorityQueue; + private final int topN; + + public TopNKeyFilter(int topN, Comparator comparator) { + this.priorityQueue = new PriorityQueue<>(topN + 1, comparator); + this.topN = topN; + } + + public boolean canForward(T kw) { + if (!priorityQueue.contains(kw)) { + priorityQueue.offer((T) kw.copyKey()); + } + if (priorityQueue.size() > topN) { + priorityQueue.poll(); + } + + return priorityQueue.contains(kw); + } + + public void clear() { + priorityQueue.clear(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java index 4734824b73..e496fd8d69 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -29,7 +29,6 @@ import java.io.Serializable; import java.util.Comparator; -import java.util.PriorityQueue; import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY; @@ -40,11 +39,7 @@ private static final long serialVersionUID = 1L; - // Maximum number of keys to hold - private transient int topN; - - // Priority queue that holds occurred keys - private transient PriorityQueue priorityQueue; + private transient TopNKeyFilter topNKeyFilter; private transient KeyWrapper keyWrapper; @@ -80,8 +75,6 @@ public int compare(KeyWrapper key1, KeyWrapper key2) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - this.topN = conf.getTopN(); - String columnSortOrder = conf.getColumnSortOrder(); boolean[] columnSortOrderIsDesc = new boolean[columnSortOrder.length()]; for (int i = 0; i < columnSortOrderIsDesc.length; i++) { @@ -107,7 +100,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { standardKeyObjectInspectors[i] = standardKeyFields[i].initialize(standardObjInspector); } - priorityQueue = new PriorityQueue<>(topN + 1, new TopNKeyOperator.KeyWrapperComparator( + this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), new TopNKeyOperator.KeyWrapperComparator( standardKeyObjectInspectors, standardKeyObjectInspectors, columnSortOrderIsDesc)); KeyWrapperFactory keyWrapperFactory = new KeyWrapperFactory(keyFields, keyObjectInspectors, @@ -117,28 +110,16 @@ protected void initializeOp(Configuration hconf) throws HiveException { @Override public void process(Object row, int tag) throws HiveException { - if (canProcess(row, tag)) { - forward(row, outputObjInspector); - } - } - - protected boolean canProcess(Object row, int tag) throws HiveException { keyWrapper.getNewKey(row, inputObjInspectors[tag]); keyWrapper.setHashKey(); - - if (!priorityQueue.contains(keyWrapper)) { - priorityQueue.offer(keyWrapper.copyKey()); - } - if (priorityQueue.size() > topN) { - priorityQueue.poll(); + if (topNKeyFilter.canForward(keyWrapper)) { + forward(row, outputObjInspector); } - - return priorityQueue.contains(keyWrapper); } @Override protected final void closeOp(boolean abort) throws HiveException { - priorityQueue.clear(); + topNKeyFilter.clear(); super.closeOp(abort); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java index c80bc804a2..ca0c169e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,31 +21,32 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TopNKeyFilter; import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase; +import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; /** * VectorTopNKeyOperator passes rows that contains top N keys only. */ -public class VectorTopNKeyOperator extends TopNKeyOperator implements VectorizationOperator { +public class VectorTopNKeyOperator extends Operator implements VectorizationOperator { private static final long serialVersionUID = 1L; private VectorTopNKeyDesc vectorDesc; private VectorizationContext vContext; - // Extract row - private transient Object[] extractedRow; - private transient VectorExtractRow vectorExtractRow; - // Batch processing private transient int[] temporarySelected; + private transient VectorHashKeyWrapperBatch keyWrappersBatch; + private transient TopNKeyFilter topNKeyFilter; public VectorTopNKeyOperator(CompilationOpContext ctx, OperatorDesc conf, VectorizationContext vContext, VectorDesc vectorDesc) { @@ -70,17 +71,17 @@ public VectorTopNKeyOperator(CompilationOpContext ctx) { protected void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); - VectorExpression.doTransientInit(vectorDesc.getKeyExpressions(), hconf); - for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) { + VectorExpression[] keyExpressions = vectorDesc.getKeyExpressions(); + VectorExpression.doTransientInit(keyExpressions, hconf); + for (VectorExpression keyExpression : keyExpressions) { keyExpression.init(hconf); } - vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init((StructObjectInspector) inputObjInspectors[0], - vContext.getProjectedColumns()); - extractedRow = new Object[vectorExtractRow.getCount()]; - temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE]; + + String columnSortOrder = conf.getColumnSortOrder(); + keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); + this.topNKeyFilter = new TopNKeyFilter<>(conf.getTopN(), keyWrappersBatch.getComparator(columnSortOrder)); } @Override @@ -99,6 +100,9 @@ public void process(Object data, int tag) throws HiveException { keyExpression.evaluate(batch); } + keyWrappersBatch.evaluateBatch(batch); + VectorHashKeyWrapperBase[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); + // Filter rows with top n keys int size = 0; int[] selected = new int[batch.selected.length]; @@ -110,11 +114,8 @@ public void process(Object data, int tag) throws HiveException { j = i; } - // Get keys - vectorExtractRow.extractRow(batch, j, extractedRow); - // Select a row in the priority queue - if (canProcess(extractedRow, tag)) { + if (topNKeyFilter.canForward(keyWrappers[i])) { selected[size++] = j; } } @@ -154,4 +155,44 @@ public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveE op.setNextVectorBatchGroupStatus(isLastGroupBatch); } } + + @Override + public String getName() { + return TopNKeyOperator.getOperatorName(); + } + + @Override + public OperatorType getType() { + return OperatorType.TOPNKEY; + } + + @Override + protected void closeOp(boolean abort) throws HiveException { + topNKeyFilter.clear(); + super.closeOp(abort); + } + + // Because a TopNKeyOperator works like a FilterOperator with top n key condition, its properties + // for optimizers has same values. Following methods are same with FilterOperator; + // supportSkewJoinOptimization, columnNamesRowResolvedCanBeObtained, + // supportAutomaticSortMergeJoin, and supportUnionRemoveOptimization. + @Override + public boolean supportSkewJoinOptimization() { + return true; + } + + @Override + public boolean columnNamesRowResolvedCanBeObtained() { + return true; + } + + @Override + public boolean supportAutomaticSortMergeJoin() { + return true; + } + + @Override + public boolean supportUnionRemoveOptimization() { + return true; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java index dd31991d03..442f100c20 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBatch.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.wrapper; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -1072,5 +1076,17 @@ public int getVariableSize(int batchSize) { } return variableSize; } + + public Comparator getComparator(String columnSortOrder) { + VectorHashKeyWrapperGeneralComparator comparator = + new VectorHashKeyWrapperGeneralComparator(columnVectorTypes.length); + for (int i = 0; i < columnVectorTypes.length; ++i) { + final int columnTypeSpecificIndex = columnTypeSpecificIndices[i]; + ColumnVector.Type columnVectorType = columnVectorTypes[columnTypeSpecificIndex]; + comparator.addColumnComparator(i, columnTypeSpecificIndex, columnVectorType, columnSortOrder.charAt(i)); + } + + return comparator; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java new file mode 100644 index 0000000000..95218a26b7 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperGeneralComparator.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.wrapper; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; + +/** + * An implementation of {@link Comparator} to compare {@link VectorHashKeyWrapperBase} instances. + */ +public class VectorHashKeyWrapperGeneralComparator implements Comparator { + + /** + * Compare {@link VectorHashKeyWrapperBase} instances only by one column. + */ + private static class VectorHashKeyWrapperBaseComparator implements Comparator { + + private final int keyIndex; + private final Comparator comparator; + + VectorHashKeyWrapperBaseComparator(int keyIndex, Comparator comparator) { + this.keyIndex = keyIndex; + this.comparator = comparator; + } + + @Override + public int compare(VectorHashKeyWrapperBase o1, VectorHashKeyWrapperBase o2) { + boolean isNull1 = o1.isNull(keyIndex); + boolean isNull2 = o2.isNull(keyIndex); + + // TODO: implement support for NULLS FIRST/LAST + if (isNull1 && isNull2) { + return 0; + } + if (isNull1) { + return -1; + } + if (isNull2) { + return 1; + } + return comparator.compare(o1, o2); + } + } + + private final List comparators; + + public VectorHashKeyWrapperGeneralComparator(int numberOfColumns) { + this.comparators = new ArrayList<>(numberOfColumns); + } + + public void addColumnComparator( + int keyIndex, int columnTypeSpecificIndex, ColumnVector.Type columnVectorType, char sortOrder) { + Comparator comparator; + switch (columnVectorType) { + case LONG: + case DECIMAL_64: + comparator = (o1, o2) -> + Long.compare(o1.getLongValue(columnTypeSpecificIndex), o2.getLongValue(columnTypeSpecificIndex)); + break; + case DOUBLE: + comparator = (o1, o2) -> Double.compare( + o1.getDoubleValue(columnTypeSpecificIndex), o2.getDoubleValue(columnTypeSpecificIndex)); + break; + case BYTES: + comparator = (o1, o2) -> StringExpr.compare( + o1.getBytes(columnTypeSpecificIndex), + o1.getByteStart(columnTypeSpecificIndex), + o1.getByteLength(columnTypeSpecificIndex), + o2.getBytes(columnTypeSpecificIndex), + o2.getByteStart(columnTypeSpecificIndex), + o2.getByteLength(columnTypeSpecificIndex)); + break; + case DECIMAL: + comparator = (o1, o2) -> + o1.getDecimal(columnTypeSpecificIndex).compareTo(o2.getDecimal(columnTypeSpecificIndex)); + break; + case TIMESTAMP: + comparator = (o1, o2) -> + o1.getTimestamp(columnTypeSpecificIndex).compareTo(o2.getTimestamp(columnTypeSpecificIndex)); + break; + case INTERVAL_DAY_TIME: + comparator = (o1, o2) -> o1.getIntervalDayTime(columnTypeSpecificIndex) + .compareTo(o2.getIntervalDayTime(columnTypeSpecificIndex)); + break; + default: + throw new RuntimeException("Unexpected column vector columnVectorType " + columnVectorType); + } + + comparators.add( + new VectorHashKeyWrapperBaseComparator(keyIndex, sortOrder == '-' ? comparator : comparator.reversed())); + } + + @Override + public int compare(VectorHashKeyWrapperBase o1, VectorHashKeyWrapperBase o2) { + for (Comparator comparator : comparators) { + int c = comparator.compare(o1, o2); + if (c != 0) { + return c; + } + } + return 0; + } +} diff --git ql/src/test/queries/clientpositive/vector_topnkey.q ql/src/test/queries/clientpositive/vector_topnkey.q index e1b7d26afe..d4bc9b4fab 100644 --- ql/src/test/queries/clientpositive/vector_topnkey.q +++ ql/src/test/queries/clientpositive/vector_topnkey.q @@ -1,4 +1,3 @@ ---! qt:dataset:src set hive.mapred.mode=nonstrict; set hive.vectorized.execution.enabled=true; set hive.optimize.topnkey=true; @@ -14,17 +13,31 @@ set hive.tez.dynamic.partition.pruning=true; set hive.stats.fetch.column.stats=true; set hive.cbo.enable=true; -explain vectorization detail -SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; - -SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; - -explain vectorization detail -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; - -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; - -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; - -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +CREATE TABLE t_test( + a int, + b int, + c double, + d varchar(50) +); + +INSERT INTO t_test VALUES +(NULL, NULL, NULL, NULL), +(8, 9, 2.0, 'one'), (8, 9, 2.0, 'one'), +(4, 2, 3.3, 'two'), +(NULL, NULL, NULL, NULL), +(NULL, NULL, NULL, NULL), +(6, 2, 1.8, 'three'), +(7, 8, 4.5, 'four'), (7, 8, 4.5, 'four'), (7, 8, 4.5, 'four'), +(4, 1, 2.0, 'five'), (4, 1, 2.0, 'five'), (4, 1, 2.0, 'five'), +(NULL, NULL, NULL, NULL); + +EXPLAIN VECTORIZATION DETAIL +SELECT a FROM t_test GROUP BY a ORDER BY a LIMIT 3; + +SELECT a FROM t_test GROUP BY a ORDER BY a LIMIT 3; +--SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; +--SELECT a, b FROM t_test GROUP BY a, b ORDER BY a DESC, b LIMIT 3; +--SELECT a, c FROM t_test GROUP BY a, c ORDER BY a, c LIMIT 3; +--SELECT d, c FROM t_test GROUP BY d, c ORDER BY d, c LIMIT 3; + +DROP TABLE t_test; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index d859270ff0..de370a81ca 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -1,212 +1,60 @@ -PREHOOK: query: explain vectorization detail -SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c double, + d varchar(50) +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c double, + d varchar(50) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(NULL, NULL, NULL, NULL), +(8, 9, 2.0, 'one'), (8, 9, 2.0, 'one'), +(4, 2, 3.3, 'two'), +(NULL, NULL, NULL, NULL), +(NULL, NULL, NULL, NULL), +(6, 2, 1.8, 'three'), +(7, 8, 4.5, 'four'), (7, 8, 4.5, 'four'), (7, 8, 4.5, 'four'), +(4, 1, 2.0, 'five'), (4, 1, 2.0, 'five'), (4, 1, 2.0, 'five'), +(NULL, NULL, NULL, NULL) PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(NULL, NULL, NULL, NULL), +(8, 9, 2.0, 'one'), (8, 9, 2.0, 'one'), +(4, 2, 3.3, 'two'), +(NULL, NULL, NULL, NULL), +(NULL, NULL, NULL, NULL), +(6, 2, 1.8, 'three'), +(7, 8, 4.5, 'four'), (7, 8, 4.5, 'four'), (7, 8, 4.5, 'four'), +(4, 1, 2.0, 'five'), (4, 1, 2.0, 'five'), (4, 1, 2.0, 'five'), +(NULL, NULL, NULL, NULL) POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4] - selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:string - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 4:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:bigint - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [string, bigint] - Reducer 2 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: z - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:string, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint - className: VectorGroupByOperator - groupByMode: MERGEPARTIAL - keyExpressions: col 0:string - native: false - vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:bigint - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: z - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +POSTHOOK: Lineage: t_test.d SCRIPT [] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a FROM t_test GROUP BY a ORDER BY a LIMIT 3 PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@t_test #### A masked pattern was here #### -POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT a FROM t_test GROUP BY a ORDER BY a LIMIT 3 POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 0 -10 10 -100 200 -103 206 -104 208 -PREHOOK: query: explain vectorization detail -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src +POSTHOOK: Input: default@t_test #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true @@ -228,51 +76,51 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: t_test + Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + vectorizationSchemaColumns: [0:a:int, 1:b:int, 2:c:double, 3:d:varchar(50), 4:ROW__ID:struct] Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: a (type: int) + outputColumnNames: a Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: + - keys: key (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 + keys: a (type: int) + Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + top n: 3 Top N Key Vectorization: className: VectorTopNKeyOperator - keyExpressions: col 0:string + keyExpressions: col 0:int native: true Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:string + keyExpressions: col 0:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: key (type: string) - minReductionHashAggr: 0.5 + keys: a (type: int) + minReductionHashAggr: 0.64285713 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs @@ -286,9 +134,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 + dataColumnCount: 4 includeColumns: [0] - dataColumns: key:string, value:string + dataColumns: a:int, b:int, c:double, d:varchar(50) partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 @@ -303,7 +151,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: KEY._col0:string + dataColumns: KEY._col0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: @@ -311,23 +159,23 @@ STAGE PLANS: Group By Vectorization: className: VectorGroupByOperator groupByMode: MERGEPARTIAL - keyExpressions: col 0:string + keyExpressions: col 0:int native: false vectorProcessingMode: MERGE_PARTIAL projectedOutputColumnNums: [] - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumns: 0:string + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -341,245 +189,30 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Limit Vectorization: - className: VectorLimitOperator - native: true - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink - -PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 -10 -100 -103 -104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:string) - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: no inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 0:string) - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:string - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: key:string, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 3 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: z - reduceColumnSortOrder: + - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit - Number of rows: 5 + Number of rows: 3 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -587,20 +220,26 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator - limit: 5 + limit: 3 Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT a FROM t_test GROUP BY a ORDER BY a LIMIT 3 PREHOOK: type: QUERY -PREHOOK: Input: default@src +PREHOOK: Input: default@t_test #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT a FROM t_test GROUP BY a ORDER BY a LIMIT 3 POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +4 +6 +7 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectComparator.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectComparator.java new file mode 100644 index 0000000000..9fb7787118 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectComparator.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.Comparator; + +import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.NullValueOption; + +/** + * This class wraps the ObjectInspectorUtils.compare method and implements java.util.Comparator. + */ +public class ObjectComparator implements Comparator { + + private final ObjectInspector objectInspector1; + private final ObjectInspector objectInspector2; + private final NullValueOption nullSortOrder; + private final MapEqualComparer mapEqualComparer = new FullMapEqualComparer(); + + public ObjectComparator(ObjectInspector objectInspector1, ObjectInspector objectInspector2, + NullValueOption nullSortOrder) { + this.objectInspector1 = objectInspector1; + this.objectInspector2 = objectInspector2; + this.nullSortOrder = nullSortOrder; + } + + @Override + public int compare(Object o1, Object o2) { + return ObjectInspectorUtils.compare(o1, objectInspector1, o2, objectInspector2, mapEqualComparer, nullSortOrder); + } +}