diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3295d1dbc5..ff742f31f6 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2213,6 +2213,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime\n" + "would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op."), + HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", false, "Whether to enable top n key optimizer."), + HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true, "Whether to enable shared work optimizer. The optimizer finds scan operator over the same table\n" + "and follow-up operators in the query plan and merges them if they meet some preconditions. Tez only."), diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 6a70a4a6bd..c1d6e83dda 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -45,9 +45,11 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ minitez.query.files.shared=delete_orig_table.q,\ orc_merge12.q,\ orc_vectorization_ppd.q,\ + topnkey.q,\ update_orig_table.q,\ vector_join_part_col_char.q,\ - vector_non_string_partition.q + vector_non_string_partition.q,\ + vector_topnkey.q # NOTE: Add tests to minitez only if it is very # specific to tez and cannot be added to minillap. @@ -227,6 +229,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ subquery_exists.q,\ subquery_in.q,\ temp_table.q,\ + topnkey.q,\ union2.q,\ union3.q,\ union4.q,\ @@ -334,6 +337,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_row__id.q,\ vector_string_concat.q,\ vector_struct_in.q,\ + vector_topnkey.q,\ vector_udf_character_length.q,\ vector_udf_octet_length.q,\ vector_varchar_4.q,\ diff --git ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java index a002348013..f8328be25c 100644 --- ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java +++ ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java @@ -37,7 +37,8 @@ ORCFILEMERGE(22), RCFILEMERGE(23), MERGEJOIN(24), - SPARKPRUNINGSINK(25); + SPARKPRUNINGSINK(25), + TOPNKEY(26); private final int value; @@ -110,6 +111,8 @@ public static OperatorType findByValue(int value) { return MERGEJOIN; case 25: return SPARKPRUNINGSINK; + case 26: + return TOPNKEY; default: return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java index 3c7f0b78c2..9f723e2820 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java @@ -161,13 +161,13 @@ private void deepCopyElements(Object[] keys, } } - transient Object[] singleEleArray = new Object[1]; transient StringObjectInspector soi_new, soi_copy; class TextKeyWrapper extends KeyWrapper { int hashcode; Object key; boolean isCopy; + transient Object[] singleEleArray = new Object[1]; public TextKeyWrapper(boolean isCopy) { this(-1, null, isCopy); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java index 7bb6590d5e..b61d37ed1d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSparkHashTableSinkOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorSparkPartitionPruningSinkOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorTopNKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkCommonOperator; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; @@ -76,6 +77,7 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; import org.apache.hadoop.hive.ql.plan.UDTFDesc; import org.apache.hadoop.hive.ql.plan.UnionDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; @@ -126,6 +128,7 @@ opvec.put(OrcFileMergeDesc.class, OrcFileMergeOperator.class); opvec.put(CommonMergeJoinDesc.class, CommonMergeJoinOperator.class); opvec.put(ListSinkDesc.class, ListSinkOperator.class); + opvec.put(TopNKeyDesc.class, TopNKeyOperator.class); } static { @@ -143,6 +146,7 @@ vectorOpvec.put(LimitDesc.class, VectorLimitOperator.class); vectorOpvec.put(PTFDesc.class, VectorPTFOperator.class); vectorOpvec.put(SparkHashTableSinkDesc.class, VectorSparkHashTableSinkOperator.class); + vectorOpvec.put(TopNKeyDesc.class, VectorTopNKeyOperator.class); } public static Operator getVectorOperator( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java new file mode 100644 index 0000000000..0de983ec25 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TopNKeyOperator.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; + +import java.io.Serializable; +import java.util.Comparator; +import java.util.PriorityQueue; + +import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY; + +public class TopNKeyOperator extends Operator implements Serializable { + + private static final long serialVersionUID = 1L; + + // Top n + private transient int topN; + private transient PriorityQueue priorityQueue; + + // Key wrappers + private transient KeyWrapper keyWrapper; + private transient KeyWrapper standardKeyWrapper; + + // Rows + private transient int rowLimit; + private transient int rowSize; + private transient Object[] rows; + + public TopNKeyOperator() { + super(); + } + + public TopNKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public static class KeyWrapperComparator implements Comparator { + private ObjectInspector[] objectInspectors1; + private ObjectInspector[] objectInspectors2; + private boolean[] columnSortOrderIsDesc; + + public KeyWrapperComparator(ObjectInspector[] objectInspectors1, ObjectInspector[] + objectInspectors2, boolean[] columnSortOrderIsDesc) { + this.objectInspectors1 = objectInspectors1; + this.objectInspectors2 = objectInspectors2; + this.columnSortOrderIsDesc = columnSortOrderIsDesc; + } + + @Override + public int compare(KeyWrapper key1, KeyWrapper key2) { + return ObjectInspectorUtils.compare(key1.getKeyArray(), objectInspectors1, + key2.getKeyArray(), objectInspectors2, columnSortOrderIsDesc); + } + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + this.topN = conf.getTopN(); + + String columnSortOrder = conf.getColumnSortOrder(); + boolean[] columnSortOrderIsDesc = new boolean[columnSortOrder.length()]; + for (int i = 0; i < columnSortOrderIsDesc.length; i++) { + columnSortOrderIsDesc[i] = (columnSortOrder.charAt(i) == '-'); + } + + ObjectInspector rowInspector = inputObjInspectors[0]; + outputObjInspector = ObjectInspectorUtils.getStandardObjectInspector(rowInspector); + + // init keyFields + int numKeys = conf.getKeyColumns().size(); + ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numKeys]; + ObjectInspector[] keyObjectInspectors = new ObjectInspector[numKeys]; + ExprNodeEvaluator[] standardKeyFields = new ExprNodeEvaluator[numKeys]; + ObjectInspector[] standardKeyObjectInspectors = new ObjectInspector[numKeys]; + + for (int i = 0; i < numKeys; i++) { + ExprNodeDesc key = conf.getKeyColumns().get(i); + keyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf); + keyObjectInspectors[i] = keyFields[i].initialize(rowInspector); + standardKeyFields[i] = ExprNodeEvaluatorFactory.get(key, hconf); + standardKeyObjectInspectors[i] = standardKeyFields[i].initialize(outputObjInspector); + } + + priorityQueue = new PriorityQueue<>(topN + 1, new TopNKeyOperator.KeyWrapperComparator( + standardKeyObjectInspectors, standardKeyObjectInspectors, columnSortOrderIsDesc)); + + keyWrapper = new KeyWrapperFactory(keyFields, keyObjectInspectors, + standardKeyObjectInspectors).getKeyWrapper(); + standardKeyWrapper = new KeyWrapperFactory(standardKeyFields, standardKeyObjectInspectors, + standardKeyObjectInspectors).getKeyWrapper(); + + rowLimit = VectorizedRowBatch.DEFAULT_SIZE; + rows = new Object[rowLimit]; + rowSize = 0; + } + + @Override + public void process(Object row, int tag) throws HiveException { + keyWrapper.getNewKey(row, inputObjInspectors[0]); + keyWrapper.setHashKey(); + + if (!priorityQueue.contains(keyWrapper)) { + priorityQueue.offer(keyWrapper.copyKey()); + } + if (priorityQueue.size() > topN) { + priorityQueue.poll(); + } + + rows[rowSize] = ObjectInspectorUtils.copyToStandardObject(row, inputObjInspectors[0]); + rowSize++; + + if (rowSize % rowLimit == 0) { + processRows(); + } + } + + private void processRows() throws HiveException { + for (int i = 0; i < rowSize; i++) { + Object row = rows[i]; + + standardKeyWrapper.getNewKey(row, outputObjInspector); + standardKeyWrapper.setHashKey(); + + if (priorityQueue.contains(standardKeyWrapper)) { + forward(row, outputObjInspector); + } + } + priorityQueue.clear(); + rowSize = 0; + } + + @Override + protected final void closeOp(boolean abort) throws HiveException { + processRows(); + super.closeOp(abort); + } + + @Override + public String getName() { + return getOperatorName(); + } + + static public String getOperatorName() { + return "TNK"; + } + + @Override + public OperatorType getType() { + return TOPNKEY; + } + + @Override + public boolean supportSkewJoinOptimization() { + return true; + } + + @Override + public boolean columnNamesRowResolvedCanBeObtained() { + return true; + } + + @Override + public boolean supportAutomaticSortMergeJoin() { + return true; + } + + @Override + public boolean supportUnionRemoveOptimization() { + return true; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java new file mode 100644 index 0000000000..f811bea202 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java @@ -0,0 +1,292 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.primitives.Ints; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.PriorityQueue; +import java.util.Properties; + +import static org.apache.hadoop.hive.ql.plan.api.OperatorType.TOPNKEY; + +public class VectorTopNKeyOperator extends Operator implements VectorizationOperator { + + private static final long serialVersionUID = 1L; + + private VectorTopNKeyDesc vectorDesc; + private VectorizationContext vContext; + + // Key column info + private int[] keyColumnNums; + private TypeInfo[] keyTypeInfos; + + // Extract row + private transient Object[] singleRow; + private transient VectorExtractRow vectorExtractRow; + + // Serialization + private transient BinarySortableSerDe binarySortableSerDe; + private transient StructObjectInspector keyObjectInspector; + + // Batch processing + private transient boolean firstBatch; + private transient PriorityQueue priorityQueue; + private transient int[] temporarySelected; + + public VectorTopNKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) { + + this(ctx); + this.conf = (TopNKeyDesc) conf; + this.vContext = vContext; + this.vectorDesc = (VectorTopNKeyDesc) vectorDesc; + + VectorExpression[] keyExpressions = this.vectorDesc.getKeyExpressions(); + final int numKeys = keyExpressions.length; + keyColumnNums = new int[numKeys]; + keyTypeInfos = new TypeInfo[numKeys]; + + for (int i = 0; i < numKeys; i++) { + keyColumnNums[i] = keyExpressions[i].getOutputColumnNum(); + keyTypeInfos[i] = keyExpressions[i].getOutputTypeInfo(); + } + } + + /** Kryo ctor. */ + @VisibleForTesting + public VectorTopNKeyOperator() { + super(); + } + + public VectorTopNKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + VectorExpression.doTransientInit(vectorDesc.getKeyExpressions()); + for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) { + keyExpression.init(hconf); + } + + this.firstBatch = true; + + VectorExpression[] keyExpressions = vectorDesc.getKeyExpressions(); + final int size = keyExpressions.length; + ObjectInspector[] fieldObjectInspectors = new ObjectInspector[size]; + + for (int i = 0; i < size; i++) { + VectorExpression keyExpression = keyExpressions[i]; + fieldObjectInspectors[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + keyExpression.getOutputTypeInfo()); + } + + keyObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( + this.conf.getKeyColumnNames(), Arrays.asList(fieldObjectInspectors)); + + temporarySelected = new int [VectorizedRowBatch.DEFAULT_SIZE]; + } + + @Override + public void process(Object data, int tag) throws HiveException { + VectorizedRowBatch batch = (VectorizedRowBatch) data; + + // The selected vector represents selected rows. + // Clone the selected vector + System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size); + int [] selectedBackup = batch.selected; + batch.selected = temporarySelected; + int sizeBackup = batch.size; + boolean selectedInUseBackup = batch.selectedInUse; + + for (VectorExpression keyExpression : vectorDesc.getKeyExpressions()) { + keyExpression.evaluate(batch); + } + + if (firstBatch) { + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(keyObjectInspector, Ints.asList(keyColumnNums)); + + singleRow = new Object[vectorExtractRow.getCount()]; + Comparator comparator = Comparator.reverseOrder(); + priorityQueue = new PriorityQueue(comparator); + + try { + binarySortableSerDe = new BinarySortableSerDe(); + Properties properties = new Properties(); + Joiner joiner = Joiner.on(','); + properties.setProperty(serdeConstants.LIST_COLUMNS, joiner.join(conf.getKeyColumnNames())); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, joiner.join(keyTypeInfos)); + properties.setProperty(serdeConstants.SERIALIZATION_SORT_ORDER, + conf.getColumnSortOrder()); + binarySortableSerDe.initialize(getConfiguration(), properties); + } catch (SerDeException e) { + throw new HiveException(e); + } + + firstBatch = false; + } + + // Clear the priority queue + priorityQueue.clear(); + + // Get top n keys + for (int i = 0; i < batch.size; i++) { + + // Get keys + if (batch.selectedInUse) { + vectorExtractRow.extractRow(batch, batch.selected[i], singleRow); + } else { + vectorExtractRow.extractRow(batch, i, singleRow); + } + + Writable keysWritable; + try { + keysWritable = binarySortableSerDe.serialize(singleRow, keyObjectInspector); + } catch (SerDeException e) { + throw new HiveException(e); + } + + // Put the copied keys into the priority queue + if (!priorityQueue.contains(keysWritable)) { + priorityQueue.offer(WritableUtils.clone(keysWritable, getConfiguration())); + } + + // Limit the queue size + if (priorityQueue.size() > conf.getTopN()) { + priorityQueue.poll(); + } + } + + // Filter rows with top n keys + int size = 0; + int[] selected = new int[batch.selected.length]; + for (int i = 0; i < batch.size; i++) { + // Get keys + if (batch.selectedInUse) { + vectorExtractRow.extractRow(batch, batch.selected[i], singleRow); + } else { + vectorExtractRow.extractRow(batch, i, singleRow); + } + + Writable keysWritable; + try { + keysWritable = binarySortableSerDe.serialize(singleRow, keyObjectInspector); + } catch (SerDeException e) { + throw new HiveException(e); + } + + // Select a row in the priority queue + if (priorityQueue.contains(keysWritable)) { + selected[size++] = i; + } + } + + // Apply selection to batch + if (batch.size != size) { + batch.selectedInUse = true; + batch.selected = selected; + batch.size = size; + } + + // Forward the result + if (size > 0) { + forward(batch, null, true); + } + + // Restore the original selected vector + batch.selected = selectedBackup; + batch.size = sizeBackup; + batch.selectedInUse = selectedInUseBackup; + } + + @Override + public String getName() { + return TopNKeyOperator.getOperatorName(); + } + + @Override + public OperatorType getType() { + return TOPNKEY; + } + + @Override + public VectorizationContext getInputVectorizationContext() { + return vContext; + } + + @Override + public VectorDesc getVectorDesc() { + return vectorDesc; + } + + @Override + public boolean supportSkewJoinOptimization() { + return true; + } + + @Override + public boolean columnNamesRowResolvedCanBeObtained() { + return true; + } + + @Override + public boolean supportAutomaticSortMergeJoin() { + return true; + } + + @Override + public boolean supportUnionRemoveOptimization() { + return true; + } + + // Must send on to VectorPTFOperator... + @Override + public void setNextVectorBatchGroupStatus(boolean isLastGroupBatch) throws HiveException { + for (Operator op : childOperators) { + op.setNextVectorBatchGroupStatus(isLastGroupBatch); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java new file mode 100644 index 0000000000..fd31e2e7a3 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; + +import java.util.List; +import java.util.Stack; + +public class TopNKeyProcessor implements NodeProcessor { + + public TopNKeyProcessor() { + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + // Get RS + ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd; + ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); + + // It only allows an ordered and limited reduce sink + if (!reduceSinkDesc.isOrdering() || reduceSinkDesc.getTopN() < 0) { + return null; + } + + // Get columns and column names + List keyColumns = reduceSinkDesc.getKeyCols(); + + // Get the parent operator and the current operator + int depth = stack.size() - 1; + Operator parentOperator = + (Operator) stack.get(depth - 1); + Operator currentOperator = + (Operator) stack.get(depth); + + // Insert a new top n key operator between the parent operator and the current operator + TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), + keyColumns); + makeOpBetween(topNKeyDesc, new RowSchema(reduceSinkOperator.getSchema()), parentOperator, + currentOperator); + + return null; + } + + private static void makeOpBetween(OperatorDesc newOperatorDesc, RowSchema rowSchema, + Operator parentOperator, + Operator currentOperator) { + + // PARENT -> (NEW, (CURRENT -> CHILD)) + Operator newOperator = OperatorFactory.getAndMakeChild( + currentOperator.getCompilationOpContext(), newOperatorDesc, rowSchema, + currentOperator.getParentOperators()); + + // PARENT -> NEW -> CURRENT -> CHILD + newOperator.getChildOperators().add(currentOperator); + currentOperator.getParentOperators().add(newOperator); + parentOperator.removeChild(currentOperator); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdown.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdown.java new file mode 100644 index 0000000000..aeb2e9ae58 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyPushdown.java @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.apache.hadoop.hive.ql.plan.ExprNodeDesc.ExprNodeDescEqualityWrapper.transform; + +public class TopNKeyPushdown implements NodeProcessor { + + private static final Logger LOG = LoggerFactory.getLogger(TopNKeyPushdown.class); + + private TopNKeyOperator operator; + private TopNKeyDesc desc; + + public TopNKeyPushdown() { + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + operator = (TopNKeyOperator) nd; + desc = operator.getConf(); + + int depth = stack.size() - 1; + int delta; + while ((delta = checkAndPushdown(stack, depth)) > 0) { + depth -= delta; + } + + return null; + } + + private List mapColumns(Map mapping, List cols) { + List colsInOperator = new ArrayList<>(); + for (ExprNodeDesc col : cols) { + String exprString = col.getExprString(); + if (mapping.containsKey(exprString)) { + colsInOperator.add(mapping.get(exprString)); + } + } + return colsInOperator; + } + + private int checkAndPushdown(Stack stack, int depth) { + + Operator parentOperator = (Operator) + stack.get(depth - 1); + + if (parentOperator instanceof TableScanOperator) { + return 0; + } + + if (parentOperator instanceof GroupByOperator) { + return handleGroupBy(parentOperator); + } else if (parentOperator instanceof SelectOperator) { + return handleSelect(parentOperator); + } else if (parentOperator instanceof FilterOperator) { + moveDown(parentOperator); + return 1; + } else if (parentOperator instanceof CommonJoinOperator) { + Operator grandParentOperator = + (Operator) stack.get(depth - 2); + Operator grandGrandParentOperator = + (Operator) stack.get(depth - 3); + return handleJoin(parentOperator, grandParentOperator, grandGrandParentOperator); + } + return 0; + } + + private int handleJoin(Operator parentOperator, + Operator grandParentOperator, + Operator grandGrandParentOperator) { + + CommonJoinOperator joinOperator = (CommonJoinOperator) parentOperator; + JoinDesc joinDesc = (JoinDesc) joinOperator.getConf(); + + // Supports inner joins only + if (!joinDesc.isNoOuterJoin()) { + return 0; + } + + ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) grandParentOperator; + ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); + + List mappedKeyColumns = mapColumns(reduceSinkDesc.getColumnExprMap(), + mapColumns(joinDesc.getColumnExprMap(), desc.getKeyColumns())); + + // There should be mapped key cols + if (mappedKeyColumns.isEmpty()) { + return 0; + } + + TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(desc.getTopN(), + mapOrder(desc.getColumnSortOrder(), reduceSinkDesc.getKeyCols(), mappedKeyColumns), + mappedKeyColumns); + + // Avoid duplicates + if (grandGrandParentOperator instanceof TopNKeyOperator) { + TopNKeyOperator grandGrandParentTnkOp = (TopNKeyOperator) grandGrandParentOperator; + TopNKeyDesc grandGrandParentTnkDesc = grandGrandParentTnkOp.getConf(); + if (grandGrandParentTnkDesc.isSame(newTopNKeyDesc)) { + return 0; + } + } + + operator = copyDown(newTopNKeyDesc, reduceSinkOperator); + desc = operator.getConf(); + return 2; + } + + private int handleSelect(Operator parentOperator) { + + SelectOperator selectOperator = (SelectOperator) parentOperator; + SelectDesc selectDesc = selectOperator.getConf(); + + List mappedKeyColumns = + mapColumns(parentOperator.getColumnExprMap(), desc.getKeyColumns()); + if (!transform(selectDesc.getColList()).containsAll(transform(mappedKeyColumns))) { + return 0; + } + + desc.setColumnSortOrder(mapOrder(desc.getColumnSortOrder(), + selectDesc.getColList(), mappedKeyColumns)); + desc.setKeyColumns(mappedKeyColumns); + operator.setSchema(new RowSchema(selectOperator.getSchema())); + + moveDown(parentOperator); + return 1; + } + + private int handleGroupBy(Operator parentOperator) { + + GroupByOperator groupByOperator = (GroupByOperator) parentOperator; + GroupByDesc groupByDesc = groupByOperator.getConf(); + + List mappedKeyColumns = mapColumns(parentOperator.getColumnExprMap(), + desc.getKeyColumns()); + if (!transform(groupByDesc.getKeys()).containsAll(transform(mappedKeyColumns))) { + return 0; + } + + desc.setColumnSortOrder(mapOrder(desc.getColumnSortOrder(), + groupByDesc.getKeys(), mappedKeyColumns)); + desc.setKeyColumns(mappedKeyColumns); + operator.setSchema(new RowSchema(groupByOperator.getSchema())); + + moveDown(parentOperator); + return 1; + } + + private void moveDown(Operator baseOperator) { + LOG.debug("Move down through " + baseOperator); + + // Before: PARENT ---> BASE ---> TNK ---> CHILD + // After: PARENT -1-> TNK -2-> BASE -3-> CHILD + + List> parentOperators = baseOperator.getParentOperators(); + List> childOperators = operator.getChildOperators(); + + // PARENT -1-> TNK + for (Operator parentOperator : parentOperators) { + parentOperator.replaceChild(baseOperator, operator); + } + operator.setParentOperators(new ArrayList<>(parentOperators)); + + // TNK -2-> BASE + operator.setChildOperators(new ArrayList<>(Collections.singletonList(baseOperator))); + baseOperator.setParentOperators(new ArrayList<>(Collections.singletonList(operator))); + + // BASE -3-> CHILD + baseOperator.setChildOperators(new ArrayList<>(childOperators)); + for (Operator childOperator : childOperators) { + childOperator.replaceParent(operator, baseOperator); + } + } + + private TopNKeyOperator copyDown(TopNKeyDesc newDesc, Operator + baseOperator) { + + Operator parentOperator = baseOperator.getParentOperators().get(0); + + LOG.debug("New top n key operator between " + baseOperator + " and " + parentOperator); + + // Before: PARENT ------------> BASE + // After: PARENT -1-> TNK -2-> BASE + + Operator newOperator = + OperatorFactory.get(baseOperator.getCompilationOpContext(), newDesc, + parentOperator.getSchema()); + + // PARENT -1-> TNK + parentOperator.replaceChild(baseOperator, newOperator); + newOperator.setParentOperators(new ArrayList<>(Collections.singletonList(parentOperator))); + + // TNK -2-> BASE + newOperator.setChildOperators(new ArrayList<>(Collections.singletonList(baseOperator))); + baseOperator.setParentOperators(new ArrayList<>(Collections.singletonList(newOperator))); + + return (TopNKeyOperator) newOperator; + } + + private static String mapOrder(String order, List parentCols, List + mappedCols) { + + StringBuilder builder = new StringBuilder(); + int index = 0; + for (ExprNodeDesc mappedCol : mappedCols) { + if (parentCols.contains(mappedCol)) { + builder.append(order.charAt(index++)); + } else { + builder.append("+"); + } + } + return builder.toString(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 394f826508..1c1a486f16 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -121,6 +121,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; import org.apache.hadoop.hive.ql.plan.VectorAppMasterEventDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorFileSinkDesc; @@ -133,6 +134,7 @@ import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc.ProcessingMode; import org.apache.hadoop.hive.ql.plan.VectorSparkHashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.VectorSparkPartitionPruningSinkDesc; +import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc; import org.apache.hadoop.hive.ql.plan.VectorLimitDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorSMBJoinDesc; @@ -2492,6 +2494,10 @@ private boolean validateFilterOperator(FilterOperator op) { desc, "Predicate", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true); } + private boolean validateTopNKeyOperator(TopNKeyOperator op) { + List keyColumns = op.getConf().getKeyColumns(); + return validateExprNodeDesc(keyColumns, "Key columns"); + } private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTezOrSpark, VectorGroupByDesc vectorGroupByDesc) { @@ -4092,6 +4098,20 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { vContext, vectorFilterDesc); } + private static Operator vectorizeTopNKeyOperator( + Operator topNKeyOperator, VectorizationContext vContext, + VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException { + + TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf(); + + List keyColumns = topNKeyDesc.getKeyColumns(); + VectorExpression[] keyExpressions = vContext.getVectorExpressions(keyColumns); + vectorTopNKeyDesc.setKeyExpressions(keyExpressions); + return OperatorFactory.getVectorOperator( + topNKeyOperator.getCompilationOpContext(), topNKeyDesc, + vContext, vectorTopNKeyDesc); + } + private static Class findVecAggrClass( Class[] vecAggrClasses, String aggregateName, ColumnVector.Type inputColVectorType, @@ -4882,6 +4902,23 @@ private static VectorPTFInfo createVectorPTFInfo(Operator inputs, // Create the context for the walker OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx, inputs, outputs); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + runTopNKeyOptimization(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run top n key optimization"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + runTopNKeyPushdown(procCtx); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run top n key pushdown"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // setup dynamic partition pruning where possible runDynamicPartitionPruning(procCtx, inputs, outputs); @@ -1006,6 +1018,45 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) ogw.startWalking(topNodes, null); } + private static void runTopNKeyOptimization(OptimizeTezProcContext procCtx) + throws SemanticException { + if (!procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) { + return; + } + + Map opRules = new LinkedHashMap(); + opRules.put( + new RuleRegExp("Top n key optimization", ReduceSinkOperator.getOperatorName() + "%"), + new TopNKeyProcessor()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + List topNodes = new ArrayList(); + topNodes.addAll(procCtx.parseContext.getTopOps().values()); + GraphWalker ogw = new DefaultGraphWalker(disp); + ogw.startWalking(topNodes, null); + } + + private static void runTopNKeyPushdown(OptimizeTezProcContext procCtx) throws SemanticException { + if (!procCtx.conf.getBoolVar(ConfVars.HIVE_OPTIMIZE_TOPNKEY)) { + return; + } + + Map opRules = new LinkedHashMap(); + opRules.put( + new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), + new TopNKeyPushdown()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); + List topNodes = new ArrayList(); + topNodes.addAll(procCtx.parseContext.getTopOps().values()); + GraphWalker ogw = new PreOrderOnceWalker(disp); + ogw.startWalking(topNodes, null); + } + private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, Map semijoins) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java new file mode 100644 index 0000000000..e7ae9f3c43 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TopNKeyDesc.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.plan.Explain.Level; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * TopNKeyDesc. + * + */ +@Explain(displayName = "Top N Key Operator", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }) +public class TopNKeyDesc extends AbstractOperatorDesc { + private static final long serialVersionUID = 1L; + + private int topN; + private String columnSortOrder; + private List keyColumns; + + public TopNKeyDesc() { + } + + public TopNKeyDesc( + final int topN, + final String columnSortOrder, + final List keyColumns) { + + this.topN = topN; + this.columnSortOrder = columnSortOrder; + this.keyColumns = keyColumns; + } + + @Explain(displayName = "top n", explainLevels = { Level.DEFAULT, Level.EXTENDED, Level.USER }) + public int getTopN() { + return topN; + } + + public void setTopN(int topN) { + this.topN = topN; + } + + @Explain(displayName = "sort order", explainLevels = { Level.DEFAULT, Level.EXTENDED, Level.USER }) + public String getColumnSortOrder() { + return columnSortOrder; + } + + public void setColumnSortOrder(String columnSortOrder) { + this.columnSortOrder = columnSortOrder; + } + + @Explain(displayName = "keys") + public String getKeyString() { + return PlanUtils.getExprListString(keyColumns); + } + + @Explain(displayName = "keys", explainLevels = { Level.USER }) + public String getUserLevelExplainKeyString() { + return PlanUtils.getExprListString(keyColumns, true); + } + + public List getKeyColumns() { + return keyColumns; + } + + public void setKeyColumns(List keyColumns) { + this.keyColumns = keyColumns; + } + + public List getKeyColumnNames() { + List ret = new ArrayList<>(); + for (ExprNodeDesc keyColumn : keyColumns) { + ret.add(keyColumn.getExprString()); + } + return ret; + } + + @Override + public boolean isSame(OperatorDesc other) { + if (getClass().getName().equals(other.getClass().getName())) { + TopNKeyDesc otherDesc = (TopNKeyDesc) other; + return getTopN() == otherDesc.getTopN() && + Objects.equals(columnSortOrder, otherDesc.columnSortOrder) && + Objects.equals(keyColumns, otherDesc.keyColumns); + } + return false; + } + + @Override + public Object clone() { + TopNKeyDesc ret = new TopNKeyDesc(); + ret.setTopN(topN); + ret.setColumnSortOrder(columnSortOrder); + ret.setKeyColumns(getKeyColumns() == null ? null : new ArrayList<>(getKeyColumns())); + return ret; + } + + public class TopNKeyDescExplainVectorization extends OperatorExplainVectorization { + private final TopNKeyDesc topNKeyDesc; + private final VectorTopNKeyDesc vectorTopNKeyDesc; + + public TopNKeyDescExplainVectorization(TopNKeyDesc topNKeyDesc, VectorTopNKeyDesc vectorTopNKeyDesc) { + super(vectorTopNKeyDesc, true); + this.topNKeyDesc = topNKeyDesc; + this.vectorTopNKeyDesc = vectorTopNKeyDesc; + } + + @Explain(vectorization = Explain.Vectorization.OPERATOR, displayName = "keyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyExpressions() { + return vectorExpressionsToStringList(vectorTopNKeyDesc.getKeyExpressions()); + } + } + + @Explain(vectorization = Explain.Vectorization.OPERATOR, displayName = "Top N Key Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public TopNKeyDescExplainVectorization getTopNKeyVectorization() { + VectorTopNKeyDesc vectorTopNKeyDesc = (VectorTopNKeyDesc) getVectorDesc(); + if (vectorTopNKeyDesc == null) { + return null; + } + return new TopNKeyDescExplainVectorization(this, vectorTopNKeyDesc); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java new file mode 100644 index 0000000000..9a266a0c57 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorTopNKeyDesc.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +public class VectorTopNKeyDesc extends AbstractVectorDesc { + + private static final long serialVersionUID = 1L; + + private VectorExpression[] keyExpressions; + + public VectorTopNKeyDesc() { + } + + public VectorExpression[] getKeyExpressions() { + return keyExpressions; + } + + public void setKeyExpressions(VectorExpression[] keyExpressions) { + this.keyExpressions = keyExpressions; + } +} diff --git ql/src/test/queries/clientpositive/topnkey.q ql/src/test/queries/clientpositive/topnkey.q new file mode 100644 index 0000000000..b17ede6b0d --- /dev/null +++ ql/src/test/queries/clientpositive/topnkey.q @@ -0,0 +1,194 @@ +--! qt:dataset:src +set hive.mapred.mode=nonstrict; +set hive.vectorized.execution.enabled=false; +set hive.optimize.topnkey=true; + +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.tez.min.bloom.filter.entries=1; + +set hive.tez.dynamic.partition.pruning=true; +set hive.stats.fetch.column.stats=true; +set hive.cbo.enable=true; + +EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; + +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; + +EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; + +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; + +explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; + +create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc; + +create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc; + +create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc; + +create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc; + +create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc; + +explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100; + +drop table store_sales; +drop table customer_demographics; +drop table date_dim; +drop table store; +drop table item; diff --git ql/src/test/queries/clientpositive/vector_topnkey.q ql/src/test/queries/clientpositive/vector_topnkey.q new file mode 100644 index 0000000000..a9b281f529 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_topnkey.q @@ -0,0 +1,194 @@ +--! qt:dataset:src +set hive.mapred.mode=nonstrict; +set hive.vectorized.execution.enabled=true; +set hive.optimize.topnkey=true; + +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.tez.min.bloom.filter.entries=1; + +set hive.tez.dynamic.partition.pruning=true; +set hive.stats.fetch.column.stats=true; +set hive.cbo.enable=true; + +explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; + +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; + +explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; + +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; + +explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; + +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; + +create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc; + +create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc; + +create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc; + +create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc; + +create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc; + +explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100; + +drop table store_sales; +drop table customer_demographics; +drop table date_dim; +drop table store; +drop table item; diff --git ql/src/test/results/clientpositive/llap/topnkey.q.out ql/src/test/results/clientpositive/llap/topnkey.q.out new file mode 100644 index 0000000000..68fe485610 --- /dev/null +++ ql/src/test/results/clientpositive/llap/topnkey.q.out @@ -0,0 +1,1055 @@ +PREHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Top N Key Operator + sort order: + + keys: KEY._col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Top N Key Operator + sort order: + + keys: KEY._col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col0 (type: string) + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@item +POSTHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@item +PREHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: ss_cdemo_sk (type: bigint), ss_item_sk (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Filter Operator + predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_cdemo_sk (type: bigint), ss_store_sk (type: bigint), ss_quantity (type: int), ss_list_price (type: double), ss_sales_price (type: double), ss_coupon_amt (type: double), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col7 (type: bigint) + sort order: + + Map-reduce partition columns: _col7 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Execution mode: llap + LLAP IO: unknown + Map 10 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i_item_id (type: string) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: customer_demographics + filterExpr: ((cd_gender = 'M') and (cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 9 + Map Operator Tree: + TableScan + alias: store + filterExpr: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s_state (type: string) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Filter Operator + predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: bigint), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col1 (type: bigint), _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col11 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col1 (type: bigint), _col11 (type: string) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col13 (type: string), _col11 (type: string) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string), _col13 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col13 (type: string), _col11 (type: string) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Select Operator + expressions: _col13 (type: string), _col11 (type: string), _col3 (type: int), _col4 (type: double), _col6 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4), sum(_col5), count(_col5) + keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Top N Key Operator + sort order: ++ + keys: KEY._col0 (type: string), KEY._col1 (type: string) + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: bigint), (_col3 / _col4) (type: double), (_col5 / _col6) (type: double), (_col7 / _col8) (type: double), (_col9 / _col10) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table customer_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_demographics +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: drop table customer_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: drop table date_dim +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@date_dim +PREHOOK: Output: default@date_dim +POSTHOOK: query: drop table date_dim +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table item +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@item +PREHOOK: Output: default@item +POSTHOOK: query: drop table item +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@item +POSTHOOK: Output: default@item diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out new file mode 100644 index 0000000000..e2b72c820f --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -0,0 +1,1349 @@ +PREHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Top N Key Operator + sort order: + + keys: KEY._col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Top N Key Operator + sort order: + + keys: KEY._col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Top N Key Operator + sort order: + + keys: key (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:string + native: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:string) + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumnNums: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [1] + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: + + keys: _col0 (type: string) + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Limit Vectorization: + className: VectorLimitOperator + native: true + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@item +POSTHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@item +PREHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: ss_cdemo_sk (type: bigint), ss_item_sk (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Filter Operator + predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_cdemo_sk (type: bigint), ss_store_sk (type: bigint), ss_quantity (type: int), ss_list_price (type: double), ss_sales_price (type: double), ss_coupon_amt (type: double), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col7 (type: bigint) + sort order: + + Map-reduce partition columns: _col7 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Execution mode: vectorized, llap + LLAP IO: unknown + Map 10 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: i_item_id (type: string) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: customer_demographics + filterExpr: ((cd_gender = 'M') and (cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 8 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 9 + Map Operator Tree: + TableScan + alias: store + filterExpr: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: + + keys: s_state (type: string) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Filter Operator + predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: bigint), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col1 (type: bigint), _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col11 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col1 (type: bigint), _col11 (type: string) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col13 (type: string), _col11 (type: string) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string), _col13 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Top N Key Operator + sort order: ++ + keys: _col13 (type: string), _col11 (type: string) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Select Operator + expressions: _col13 (type: string), _col11 (type: string), _col3 (type: int), _col4 (type: double), _col6 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4), sum(_col5), count(_col5) + keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint) + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Top N Key Operator + sort order: ++ + keys: KEY._col0 (type: string), KEY._col1 (type: string) + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + top n: 100 + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: bigint), (_col3 / _col4) (type: double), (_col5 / _col6) (type: double), (_col7 / _col8) (type: double), (_col9 / _col10) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table customer_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_demographics +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: drop table customer_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: drop table date_dim +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@date_dim +PREHOOK: Output: default@date_dim +POSTHOOK: query: drop table date_dim +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table item +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@item +PREHOOK: Output: default@item +POSTHOOK: query: drop table item +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@item +POSTHOOK: Output: default@item diff --git ql/src/test/results/clientpositive/tez/topnkey.q.out ql/src/test/results/clientpositive/tez/topnkey.q.out new file mode 100644 index 0000000000..f9e5121757 --- /dev/null +++ ql/src/test/results/clientpositive/tez/topnkey.q.out @@ -0,0 +1,714 @@ +PREHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + Limit [LIM_9] (rows=5 width=95) + Number of rows:5 + Select Operator [SEL_8] (rows=250 width=95) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_7] + Group By Operator [GBY_5] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + Top N Key Operator [TNK_11] (rows=250 width=95) + keys:KEY._col0,sort order:+,top n:5 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_12] (rows=500 width=178) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + Limit [LIM_8] (rows=5 width=87) + Number of rows:5 + Select Operator [SEL_7] (rows=250 width=87) + Output:["_col0"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_6] + Group By Operator [GBY_4] (rows=250 width=87) + Output:["_col0"],keys:KEY._col0 + Top N Key Operator [TNK_10] (rows=250 width=87) + keys:KEY._col0,sort order:+,top n:5 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0 + Group By Operator [GBY_2] (rows=250 width=87) + Output:["_col0"],keys:key + Select Operator [SEL_1] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_11] (rows=500 width=87) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_13] + Limit [LIM_12] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_11] (rows=809 width=178) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + Select Operator [SEL_9] (rows=809 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_18] (rows=809 width=178) + keys:_col0,sort order:+,top n:5 + Merge Join Operator [MERGEJOIN_21] (rows=809 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_16] (rows=500 width=87) + predicate:key is not null + Top N Key Operator [TNK_19] (rows=500 width=87) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_17] (rows=500 width=178) + predicate:key is not null + Top N Key Operator [TNK_20] (rows=500 width=178) + keys:key,sort order:+,top n:5 + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@item +POSTHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@item +PREHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 + File Output Operator [FS_36] + Limit [LIM_35] (rows=1/1 width=8) + Number of rows:100 + Select Operator [SEL_34] (rows=1/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_33] + Select Operator [SEL_32] (rows=1/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_31] (rows=1/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 + Top N Key Operator [TNK_56] (rows=3/1 width=8) + keys:KEY._col0, KEY._col1,sort order:++,top n:100 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_29] (rows=3/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(_col2)","avg(_col3)","avg(_col4)","avg(_col5)"],keys:_col0, _col1, 0L + Select Operator [SEL_27] (rows=1/0 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Top N Key Operator [TNK_57] (rows=1/0 width=8) + keys:_col13, _col11,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_72] (rows=1/0 width=8) + Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11","_col13"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=1/0 width=560) + Output:["_col0"] + Filter Operator [FIL_55] (rows=1/0 width=560) + predicate:((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) + TableScan [TS_12] (rows=1/0 width=560) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1 + Top N Key Operator [TNK_58] (rows=1/0 width=8) + keys:_col13, _col11,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_71] (rows=1/0 width=8) + Conds:RS_21._col0=RS_22._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col11","_col13"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=1/0 width=192) + Output:["_col0","_col1"] + Filter Operator [FIL_54] (rows=1/0 width=192) + predicate:i_item_sk is not null + Top N Key Operator [TNK_63] (rows=1/0 width=192) + keys:i_item_id,sort order:+,top n:100 + TableScan [TS_9] (rows=1/0 width=192) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Top N Key Operator [TNK_59] (rows=1/0 width=8) + keys:_col1, _col11,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_70] (rows=1/0 width=8) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col11"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1/0 width=192) + Output:["_col0","_col1"] + Filter Operator [FIL_53] (rows=1/0 width=192) + predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) + Top N Key Operator [TNK_62] (rows=1/0 width=192) + keys:s_state,sort order:+,top n:100 + TableScan [TS_6] (rows=1/0 width=192) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Top N Key Operator [TNK_60] (rows=1/0 width=8) + keys:_col1, _col0,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_69] (rows=1/0 width=8) + Conds:RS_15._col7=RS_16._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col7 + Select Operator [SEL_2] (rows=1/0 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_51] (rows=1/0 width=8) + predicate:(ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + Top N Key Operator [TNK_61] (rows=1/0 width=8) + keys:ss_cdemo_sk, ss_item_sk,sort order:++,top n:100 + TableScan [TS_0] (rows=1/0 width=8) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1/0 width=12) + Output:["_col0"] + Filter Operator [FIL_52] (rows=1/0 width=12) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=1/0 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table customer_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_demographics +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: drop table customer_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: drop table date_dim +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@date_dim +PREHOOK: Output: default@date_dim +POSTHOOK: query: drop table date_dim +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table item +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@item +PREHOOK: Output: default@item +POSTHOOK: query: drop table item +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@item +POSTHOOK: Output: default@item diff --git ql/src/test/results/clientpositive/tez/vector_topnkey.q.out ql/src/test/results/clientpositive/tez/vector_topnkey.q.out new file mode 100644 index 0000000000..d549872d69 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_topnkey.q.out @@ -0,0 +1,714 @@ +PREHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_22] + Limit [LIM_21] (rows=5 width=95) + Number of rows:5 + Select Operator [SEL_20] (rows=250 width=95) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_19] + Group By Operator [GBY_18] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + Top N Key Operator [TNK_17] (rows=250 width=95) + keys:KEY._col0,sort order:+,top n:5 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_14] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_13] (rows=500 width=178) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_21] + Limit [LIM_20] (rows=5 width=87) + Number of rows:5 + Select Operator [SEL_19] (rows=250 width=87) + Output:["_col0"] + <-Reducer 2 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=250 width=87) + Output:["_col0"],keys:KEY._col0 + Top N Key Operator [TNK_16] (rows=250 width=87) + keys:KEY._col0,sort order:+,top n:5 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=250 width=87) + Output:["_col0"],keys:key + Select Operator [SEL_13] (rows=500 width=87) + Output:["key"] + Top N Key Operator [TNK_12] (rows=500 width=87) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_32] + Limit [LIM_31] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_30] (rows=809 width=178) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + Select Operator [SEL_9] (rows=809 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_18] (rows=809 width=178) + keys:_col0,sort order:+,top n:5 + Merge Join Operator [MERGEJOIN_21] (rows=809 width=178) + Conds:RS_25._col0=RS_29._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_24] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_23] (rows=500 width=87) + predicate:key is not null + Top N Key Operator [TNK_22] (rows=500 width=87) + keys:key,sort order:+,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_29] + PartitionCols:_col0 + Select Operator [SEL_28] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_27] (rows=500 width=178) + predicate:key is not null + Top N Key Operator [TNK_26] (rows=500 width=178) + keys:key,sort order:+,top n:5 + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@item +POSTHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@item +PREHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 11 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:100 + Stage-1 + Reducer 7 vectorized + File Output Operator [FS_97] + Limit [LIM_96] (rows=1/1 width=8) + Number of rows:100 + Select Operator [SEL_95] (rows=1/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_94] + Select Operator [SEL_93] (rows=1/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_92] (rows=1/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 + Top N Key Operator [TNK_91] (rows=3/1 width=8) + keys:KEY._col0, KEY._col1,sort order:++,top n:100 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_29] (rows=3/1 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(_col2)","avg(_col3)","avg(_col4)","avg(_col5)"],keys:_col0, _col1, 0L + Select Operator [SEL_27] (rows=1/0 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Top N Key Operator [TNK_57] (rows=1/0 width=8) + keys:_col13, _col11,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_72] (rows=1/0 width=8) + Conds:RS_24._col1=RS_90._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col11","_col13"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_90] + PartitionCols:_col0 + Select Operator [SEL_89] (rows=1/0 width=560) + Output:["_col0"] + Filter Operator [FIL_88] (rows=1/0 width=560) + predicate:((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) + TableScan [TS_12] (rows=1/0 width=560) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1 + Top N Key Operator [TNK_58] (rows=1/0 width=8) + keys:_col13, _col11,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_71] (rows=1/0 width=8) + Conds:RS_21._col0=RS_87._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col11","_col13"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_87] + PartitionCols:_col0 + Select Operator [SEL_86] (rows=1/0 width=192) + Output:["_col0","_col1"] + Filter Operator [FIL_85] (rows=1/0 width=192) + predicate:i_item_sk is not null + Top N Key Operator [TNK_84] (rows=1/0 width=192) + keys:i_item_id,sort order:+,top n:100 + TableScan [TS_9] (rows=1/0 width=192) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Top N Key Operator [TNK_59] (rows=1/0 width=8) + keys:_col1, _col11,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_70] (rows=1/0 width=8) + Conds:RS_18._col2=RS_83._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col11"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=1/0 width=192) + Output:["_col0","_col1"] + Filter Operator [FIL_81] (rows=1/0 width=192) + predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) + Top N Key Operator [TNK_80] (rows=1/0 width=192) + keys:s_state,sort order:+,top n:100 + TableScan [TS_6] (rows=1/0 width=192) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Top N Key Operator [TNK_60] (rows=1/0 width=8) + keys:_col1, _col0,sort order:++,top n:100 + Merge Join Operator [MERGEJOIN_69] (rows=1/0 width=8) + Conds:RS_76._col7=RS_79._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_76] + PartitionCols:_col7 + Select Operator [SEL_75] (rows=1/0 width=8) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_74] (rows=1/0 width=8) + predicate:(ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + Top N Key Operator [TNK_73] (rows=1/0 width=8) + keys:ss_cdemo_sk, ss_item_sk,sort order:++,top n:100 + TableScan [TS_0] (rows=1/0 width=8) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_79] + PartitionCols:_col0 + Select Operator [SEL_78] (rows=1/0 width=12) + Output:["_col0"] + Filter Operator [FIL_77] (rows=1/0 width=12) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_3] (rows=1/0 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table customer_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_demographics +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: drop table customer_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: drop table date_dim +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@date_dim +PREHOOK: Output: default@date_dim +POSTHOOK: query: drop table date_dim +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table item +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@item +PREHOOK: Output: default@item +POSTHOOK: query: drop table item +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@item +POSTHOOK: Output: default@item diff --git ql/src/test/results/clientpositive/topnkey.q.out ql/src/test/results/clientpositive/topnkey.q.out new file mode 100644 index 0000000000..6dc93665a1 --- /dev/null +++ ql/src/test/results/clientpositive/topnkey.q.out @@ -0,0 +1,992 @@ +PREHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@item +POSTHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@item +PREHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_cdemo_sk (type: bigint), ss_store_sk (type: bigint), ss_quantity (type: int), ss_list_price (type: double), ss_sales_price (type: double), ss_coupon_amt (type: double), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col7 (type: bigint) + sort order: + + Map-reduce partition columns: _col7 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + TableScan + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + TableScan + alias: store + filterExpr: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: bigint), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col11 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string) + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string), _col13 (type: string) + TableScan + alias: customer_demographics + filterExpr: ((cd_gender = 'M') and (cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col13 (type: string), _col11 (type: string), _col3 (type: int), _col4 (type: double), _col6 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4), sum(_col5), count(_col5) + keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: bigint), (_col3 / _col4) (type: double), (_col5 / _col6) (type: double), (_col7 / _col8) (type: double), (_col9 / _col10) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table customer_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_demographics +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: drop table customer_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: drop table date_dim +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@date_dim +PREHOOK: Output: default@date_dim +POSTHOOK: query: drop table date_dim +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table item +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@item +PREHOOK: Output: default@item +POSTHOOK: query: drop table item +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@item +POSTHOOK: Output: default@item diff --git ql/src/test/results/clientpositive/vector_topnkey.q.out ql/src/test/results/clientpositive/vector_topnkey.q.out new file mode 100644 index 0000000000..45e826a727 --- /dev/null +++ ql/src/test/results/clientpositive/vector_topnkey.q.out @@ -0,0 +1,1173 @@ +PREHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4] + selectExpressions: CastStringToLong(col 3:string)(children: StringSubstrColStart(col 1:string, start 4) -> 3:string) -> 4:int + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string, 1:_col1:bigint] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:string, _col1:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 +PREHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +PREHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Map Vectorization: + enabled: false + enabledConditionsNotMet: Vectorized map work only works with 1 TableScanOperator IS false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:_col0:string, 1:_col1:string] + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: _col0:string, _col1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store_sales +POSTHOOK: query: create table store_sales +( + ss_sold_time_sk bigint, + ss_item_sk bigint, + ss_customer_sk bigint, + ss_cdemo_sk bigint, + ss_hdemo_sk bigint, + ss_addr_sk bigint, + ss_store_sk bigint, + ss_promo_sk bigint, + ss_ticket_number bigint, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +partitioned by (ss_sold_date_sk bigint) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store_sales +PREHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: create table customer_demographics +( + cd_demo_sk bigint, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_dim +POSTHOOK: query: create table date_dim +( + d_date_sk bigint, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_dim +PREHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@store +POSTHOOK: query: create table store +( + s_store_sk bigint, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk bigint, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@store +PREHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@item +POSTHOOK: query: create table item +( + i_item_sk bigint, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@item +PREHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@customer_demographics +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@item +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +#### A masked pattern was here #### +POSTHOOK: query: select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@item +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze +select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'U' and + cd_education_status = '2 yr Degree' and + d_year = 2001 and + s_state in ('SD','FL', 'MI', 'LA', 'MO', 'SC') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-6 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_cdemo_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_cdemo_sk (type: bigint), ss_store_sk (type: bigint), ss_quantity (type: int), ss_list_price (type: double), ss_sales_price (type: double), ss_coupon_amt (type: double), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col7 (type: bigint) + sort order: + + Map-reduce partition columns: _col7 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + TableScan + alias: date_dim + filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((d_year = 2001) and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col7 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double) + TableScan + alias: store + filterExpr: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: s_store_sk (type: bigint), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6, _col11 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string) + TableScan + alias: item + filterExpr: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i_item_sk is not null (type: boolean) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: int), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col11 (type: string), _col13 (type: string) + TableScan + alias: customer_demographics + filterExpr: ((cd_gender = 'M') and (cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cd_education_status = '2 yr Degree') and (cd_gender = 'M') and (cd_marital_status = 'U') and cd_demo_sk is not null) (type: boolean) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cd_demo_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1/0 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col3, _col4, _col5, _col6, _col11, _col13 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col13 (type: string), _col11 (type: string), _col3 (type: int), _col4 (type: double), _col6 (type: double), _col5 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1/0 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4), sum(_col5), count(_col5) + keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 3/1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: bigint), (_col3 / _col4) (type: double), (_col5 / _col6) (type: double), (_col7 / _col8) (type: double), (_col9 / _col10) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1/1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 100 + Processor Tree: + ListSink + +PREHOOK: query: drop table store_sales +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store_sales +PREHOOK: Output: default@store_sales +POSTHOOK: query: drop table store_sales +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: default@store_sales +PREHOOK: query: drop table customer_demographics +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@customer_demographics +PREHOOK: Output: default@customer_demographics +POSTHOOK: query: drop table customer_demographics +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@customer_demographics +POSTHOOK: Output: default@customer_demographics +PREHOOK: query: drop table date_dim +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@date_dim +PREHOOK: Output: default@date_dim +POSTHOOK: query: drop table date_dim +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@date_dim +POSTHOOK: Output: default@date_dim +PREHOOK: query: drop table store +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@store +PREHOOK: Output: default@store +POSTHOOK: query: drop table store +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@store +POSTHOOK: Output: default@store +PREHOOK: query: drop table item +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@item +PREHOOK: Output: default@item +POSTHOOK: query: drop table item +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@item +POSTHOOK: Output: default@item diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index a442cb1228..72ca02ffa9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -943,6 +943,25 @@ public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2, return 0; } + public static int compare(Object[] o1, ObjectInspector[] oi1, Object[] o2, + ObjectInspector[] oi2, boolean[] columnSortOrderIsDesc) { + assert (o1.length == oi1.length); + assert (o2.length == oi2.length); + assert (o1.length == o2.length); + + for (int i = 0; i < o1.length; i++) { + int r = compare(o1[i], oi1[i], o2[i], oi2[i]); + if (r != 0) { + if (columnSortOrderIsDesc[i]) { + return r; + } else { + return -r; + } + } + } + return 0; + } + /** * Whether comparison is supported for this type. * Currently all types that references any map are not comparable.