diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9c721ed..7da81d7 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1791,8 +1791,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager", "Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" + "transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" + - "hive.compactor.worker.threads, hive.support.concurrency (true), hive.enforce.bucketing\n" + - "(true), and hive.exec.dynamic.partition.mode (nonstrict).\n" + + "hive.compactor.worker.threads, hive.support.concurrency (true),\n" + + "and hive.exec.dynamic.partition.mode (nonstrict).\n" + "The default DummyTxnManager replicates pre-Hive-0.13 behavior and provides\n" + "no transactions."), HIVE_TXN_STRICT_LOCKING_MODE("hive.txn.strict.locking.mode", true, "In strict mode non-ACID\n" + diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out index ce1ac16..e55b1c2 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out @@ -149,6 +149,7 @@ STAGE PLANS: compressed: false GlobalTableId: 1 directory: ### BLOBSTORE_STAGING_PATH ### + Dp Sort State: PARTITION_BUCKET_SORTED NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### diff --git itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out index 1ca24bc..660cebb 100644 --- itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out +++ itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out @@ -167,6 +167,7 @@ STAGE PLANS: compressed: false GlobalTableId: 1 directory: ### BLOBSTORE_STAGING_PATH ### + Dp Sort State: PARTITION_BUCKET_SORTED NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Stats Publishing Key Prefix: ### BLOBSTORE_STAGING_PATH ### diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index 789d2a3..e03f4b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -18,16 +18,15 @@ package org.apache.hadoop.hive.ql.exec; +import static org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer.BUCKET_NUMBER_COL_NAME; import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM; import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.List; import java.util.Random; -import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; @@ -35,7 +34,6 @@ import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; @@ -44,15 +42,12 @@ import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.Serializer; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; @@ -80,11 +75,13 @@ private transient ObjectInspector[] partitionObjectInspectors; private transient ObjectInspector[] bucketObjectInspectors; private transient int buckColIdxInKey; - private transient int buckColIdxInKeyForAcid = -1; + /** + * {@link org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer} + */ + private transient int buckColIdxInKeyForSdpo = -1; private boolean firstRow; private transient int tag; private boolean skipTag = false; - private transient InspectableObject tempInspectableObject = new InspectableObject(); private transient int[] valueIndex; // index for value(+ from keys, - from values) protected transient OutputCollector out; @@ -143,12 +140,6 @@ // TODO: we only ever use one row of these at a time. Why do we need to cache multiple? protected transient Object[][] cachedKeys; - private StructField recIdField; // field to look for record identifier in - private StructField bucketField; // field to look for bucket in record identifier - private StructObjectInspector acidRowInspector; // row inspector used by acid options - private StructObjectInspector recIdInspector; // OI for the record identifier - private IntObjectInspector bucketInspector; // OI for the bucket field in the record id - protected transient long numRows = 0; protected transient long cntr = 1; protected transient long logEveryNRows = 0; @@ -186,8 +177,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { keyEval = new ExprNodeEvaluator[keys.size()]; int i = 0; for (ExprNodeDesc e : keys) { - if (e instanceof ExprNodeConstantDesc && ("_bucket_number").equals(((ExprNodeConstantDesc)e).getValue())) { - buckColIdxInKeyForAcid = i; + if (e instanceof ExprNodeConstantDesc && (BUCKET_NUMBER_COL_NAME).equals(((ExprNodeConstantDesc)e).getValue())) { + buckColIdxInKeyForSdpo = i; } keyEval[i++] = ExprNodeEvaluatorFactory.get(e); } @@ -319,20 +310,6 @@ public void process(Object row, int tag) throws HiveException { // TODO: this is fishy - we init object inspectors based on first tag. We // should either init for each tag, or if rowInspector doesn't really // matter, then we can create this in ctor and get rid of firstRow. - if (conf.getWriteType() == AcidUtils.Operation.UPDATE || - conf.getWriteType() == AcidUtils.Operation.DELETE) { - assert rowInspector instanceof StructObjectInspector : - "Expected rowInspector to be instance of StructObjectInspector but it is a " + - rowInspector.getClass().getName(); - acidRowInspector = (StructObjectInspector)rowInspector; - // The record identifier is always in the first column - recIdField = acidRowInspector.getAllStructFieldRefs().get(0); - recIdInspector = (StructObjectInspector)recIdField.getFieldObjectInspector(); - // The bucket field is in the second position - bucketField = recIdInspector.getAllStructFieldRefs().get(1); - bucketInspector = (IntObjectInspector)bucketField.getFieldObjectInspector(); - } - if (isLogInfoEnabled) { LOG.info("keys are " + conf.getOutputKeyColumnNames() + " num distributions: " + conf.getNumDistributionKeys()); @@ -360,14 +337,9 @@ public void process(Object row, int tag) throws HiveException { if (bucketEval != null) { bucketNumber = computeBucketNumber(row, conf.getNumBuckets()); cachedKeys[0][buckColIdxInKey] = new Text(String.valueOf(bucketNumber)); - } else if (conf.getWriteType() == AcidUtils.Operation.UPDATE || - conf.getWriteType() == AcidUtils.Operation.DELETE) { - // In the non-partitioned case we still want to compute the bucket number for updates and - // deletes. - bucketNumber = computeBucketNumber(row, conf.getNumBuckets()); - if (buckColIdxInKeyForAcid != -1) { - cachedKeys[0][buckColIdxInKeyForAcid] = new Text(String.valueOf(bucketNumber)); - } + } + if (buckColIdxInKeyForSdpo != -1) { + cachedKeys[0][buckColIdxInKeyForSdpo] = new Text(String.valueOf(bucketNumber)); } HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null); @@ -427,24 +399,11 @@ public void process(Object row, int tag) throws HiveException { } private int computeBucketNumber(Object row, int numBuckets) throws HiveException { - if (conf.getWriteType() == AcidUtils.Operation.UPDATE || - conf.getWriteType() == AcidUtils.Operation.DELETE) { - // We don't need to evaluate the hash code. Instead read the bucket number directly from - // the row. I don't need to evaluate any expressions as I know I am reading the ROW__ID - // column directly. - Object recIdValue = acidRowInspector.getStructFieldData(row, recIdField); - int buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField)); - if (isLogTraceEnabled) { - LOG.trace("Acid choosing bucket number " + buckNum); - } - return buckNum; - } else { - Object[] bucketFieldValues = new Object[bucketEval.length]; - for (int i = 0; i < bucketEval.length; i++) { - bucketFieldValues[i] = bucketEval[i].evaluate(row); - } - return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketObjectInspectors, numBuckets); + Object[] bucketFieldValues = new Object[bucketEval.length]; + for (int i = 0; i < bucketEval.length; i++) { + bucketFieldValues[i] = bucketEval[i].evaluate(row); } + return ObjectInspectorUtils.getBucketNumber(bucketFieldValues, bucketObjectInspectors, numBuckets); } private void populateCachedDistributionKeys(Object row, int index) throws HiveException { @@ -477,23 +436,23 @@ protected final int computeMurmurHash(HiveKey firstKey) { return hash.hash(firstKey.getBytes(), firstKey.getDistKeyLength(), 0); } + /** + * For Acid Update/Delete case, we expect a single partitionEval of the form + * UDFToInteger(ROW__ID) and buckNum == -1 so that the result of this method + * is to return the bucketId extracted from ROW__ID unless it optimized by + * {@link org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer} + */ private int computeHashCode(Object row, int buckNum) throws HiveException { // Evaluate the HashCode int keyHashCode = 0; if (partitionEval.length == 0) { - // If no partition cols and not doing an update or delete, just distribute the data uniformly + // If no partition cols, just distribute the data uniformly // to provide better load balance. If the requirement is to have a single reducer, we should // set the number of reducers to 1. Use a constant seed to make the code deterministic. - // For acid operations make sure to send all records with the same key to the same - // FileSinkOperator, as the RecordUpdater interface can't manage multiple writers for a file. - if (conf.getWriteType() == AcidUtils.Operation.NOT_ACID) { - if (random == null) { - random = new Random(12345); - } - keyHashCode = random.nextInt(); - } else { - keyHashCode = 1; + if (random == null) { + random = new Random(12345); } + keyHashCode = random.nextInt(); } else { Object[] bucketFieldValues = new Object[partitionEval.length]; for(int i = 0; i < partitionEval.length; i++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java index 8f40998..ac1c803 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java @@ -401,9 +401,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - assert fsOp.getConf().getWriteType() == rsOp.getConf().getWriteType() : - "WriteType mismatch. fsOp is " + fsOp.getConf().getWriteType() + - "; rsOp is " + rsOp.getConf().getWriteType(); // Don't do this optimization with updates or deletes if (fsOp.getConf().getWriteType() == AcidUtils.Operation.UPDATE || fsOp.getConf().getWriteType() == AcidUtils.Operation.DELETE) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index 926386b..438ac1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; +import org.apache.hadoop.hive.ql.io.RecordIdentifier; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -51,6 +51,7 @@ import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -81,7 +82,7 @@ */ public class SortedDynPartitionOptimizer extends Transform { - private static final String BUCKET_NUMBER_COL_NAME = "_bucket_number"; + public static final String BUCKET_NUMBER_COL_NAME = "_bucket_number"; @Override public ParseContext transform(ParseContext pCtx) throws SemanticException { @@ -191,9 +192,19 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // When doing updates and deletes we always want to sort on the rowid because the ACID // reader will expect this sort order when doing reads. So // ignore whatever comes from the table and enforce this sort order instead. - sortPositions = Arrays.asList(0); - sortOrder = Arrays.asList(1); // 1 means asc, could really use enum here in the thrift if - bucketColumns = new ArrayList<>(); // Bucketing column is already present in ROW__ID, which is specially handled in ReduceSink + sortPositions = Collections.singletonList(0); + sortOrder = Collections.singletonList(1); // 1 means asc, could really use enum here in the thrift if + bucketColumns = new ArrayList<>(); + /** + * ROW__ID is always the 1st column of Insert representing Update/Delete operation + * (set up in {@link org.apache.hadoop.hive.ql.parse.UpdateDeleteSemanticAnalyzer}) + * and we wrap it in UDFToInteger + * (in {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer#getPartitionColsFromBucketColsForUpdateDelete(Operator, boolean)}) + * which extracts bucketId from it + * see {@link org.apache.hadoop.hive.ql.udf.UDFToInteger#evaluate(RecordIdentifier)}*/ + ColumnInfo ci = fsParent.getSchema().getSignature().get(0); + assert VirtualColumn.ROWID.getTypeInfo().equals(ci.getType()) : "expected ROW__ID here..."; + bucketColumns.add(new ExprNodeColumnDesc(ci)); } else { if (!destTable.getSortCols().isEmpty()) { // Sort columns specified by table @@ -231,7 +242,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // Create ReduceSink operator ReduceSinkOperator rsOp = getReduceSinkOp(partitionPositions, sortPositions, sortOrder, sortNullOrder, - allRSCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType()); + allRSCols, bucketColumns, numBuckets, fsParent); List descs = new ArrayList(allRSCols.size()); List colNames = new ArrayList(); @@ -247,7 +258,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } RowSchema selRS = new RowSchema(fsParent.getSchema()); - if (!bucketColumns.isEmpty() || fsOp.getConf().getWriteType() == Operation.DELETE || fsOp.getConf().getWriteType() == Operation.UPDATE) { + if (!bucketColumns.isEmpty()) { descs.add(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, ReduceField.KEY.toString()+".'"+BUCKET_NUMBER_COL_NAME+"'", null, false)); colNames.add("'"+BUCKET_NUMBER_COL_NAME+"'"); ColumnInfo ci = new ColumnInfo(BUCKET_NUMBER_COL_NAME, TypeInfoFactory.stringTypeInfo, selRS.getSignature().get(0).getTabAlias(), true, true); @@ -268,7 +279,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // Set if partition sorted or partition bucket sorted fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED); - if (bucketColumns.size() > 0 || fsOp.getConf().getWriteType() == Operation.DELETE || fsOp.getConf().getWriteType() == Operation.UPDATE) { + if (!bucketColumns.isEmpty()) { fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_BUCKET_SORTED); } @@ -428,7 +439,7 @@ private void inferSortPositions(Operator fsParent, public ReduceSinkOperator getReduceSinkOp(List partitionPositions, List sortPositions, List sortOrder, List sortNullOrder, ArrayList allCols, ArrayList bucketColumns, int numBuckets, - Operator parent, AcidUtils.Operation writeType) throws SemanticException { + Operator parent) throws SemanticException { // Order of KEY columns // 1) Partition columns @@ -441,7 +452,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, int numPartAndBuck = partitionPositions.size(); keyColsPosInVal.addAll(partitionPositions); - if (!bucketColumns.isEmpty() || writeType == Operation.DELETE || writeType == Operation.UPDATE) { + if (!bucketColumns.isEmpty()) { keyColsPosInVal.add(-1); numPartAndBuck += 1; } @@ -450,7 +461,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, // by default partition and bucket columns are sorted in ascending order Integer order = 1; if (sortOrder != null && !sortOrder.isEmpty()) { - if (sortOrder.get(0).intValue() == 0) { + if (sortOrder.get(0) == 0) { order = 0; } } @@ -461,7 +472,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, String orderStr = ""; for (Integer i : newSortOrder) { - if(i.intValue() == 1) { + if(i == 1) { orderStr += "+"; } else { orderStr += "-"; @@ -472,7 +483,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, // nulls come first; otherwise nulls come last Integer nullOrder = order == 1 ? 0 : 1; if (sortNullOrder != null && !sortNullOrder.isEmpty()) { - if (sortNullOrder.get(0).intValue() == 0) { + if (sortNullOrder.get(0) == 0) { nullOrder = 0; } else { nullOrder = 1; @@ -485,7 +496,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, String nullOrderStr = ""; for (Integer i : newSortNullOrder) { - if(i.intValue() == 0) { + if(i == 0) { nullOrderStr += "a"; } else { nullOrderStr += "z"; @@ -563,7 +574,7 @@ public ReduceSinkOperator getReduceSinkOp(List partitionPositions, // Number of reducers is set to default (-1) ReduceSinkDesc rsConf = new ReduceSinkDesc(keyCols, keyCols.size(), valCols, keyColNames, distinctColumnIndices, valColNames, -1, partCols, -1, keyTable, - valueTable, writeType); + valueTable); rsConf.setBucketCols(bucketColumns); rsConf.setNumBuckets(numBuckets); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java index 7a4f22a..7670dc1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionTimeGranularityOptimizer.java @@ -170,7 +170,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, List sortNullOrder = new ArrayList(1); sortNullOrder.add(0); // nulls first ReduceSinkOperator rsOp = getReduceSinkOp(keyPositions, sortOrder, - sortNullOrder, allRSCols, granularitySelOp, fsOp.getConf().getWriteType()); + sortNullOrder, allRSCols, granularitySelOp); // Create backtrack SelectOp List descs = new ArrayList(allRSCols.size()); @@ -295,8 +295,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } private ReduceSinkOperator getReduceSinkOp(List keyPositions, List sortOrder, - List sortNullOrder, ArrayList allCols, Operator parent, - AcidUtils.Operation writeType) throws SemanticException { + List sortNullOrder, ArrayList allCols, Operator parent + ) throws SemanticException { ArrayList keyCols = Lists.newArrayList(); // we will clone here as RS will update bucket column key with its @@ -353,7 +353,7 @@ private ReduceSinkOperator getReduceSinkOp(List keyPositions, List signature = new ArrayList<>(); for (int index = 0; index < parent.getSchema().getSignature().size(); index++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index fadbc20..f09bfa4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -2980,11 +2980,7 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED); String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE); - - boolean acidChange = - desc.getWriteType() == AcidUtils.Operation.UPDATE || - desc.getWriteType() == AcidUtils.Operation.DELETE; - + boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty(); boolean hasTopN = desc.getTopN() >= 0; @@ -3004,7 +3000,6 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, // Remember the condition variables for EXPLAIN regardless. vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled); vectorDesc.setEngine(engine); - vectorDesc.setAcidChange(acidChange); vectorDesc.setHasBuckets(hasBuckets); vectorDesc.setHasTopN(hasTopN); vectorDesc.setUseUniformHash(useUniformHash); @@ -3015,7 +3010,6 @@ private boolean canSpecializeReduceSink(ReduceSinkDesc desc, // Many restrictions. if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || - acidChange || hasBuckets || hasTopN || !useUniformHash || diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java index 5cc1c45..4716adc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java @@ -438,7 +438,10 @@ public void setRemovedReduceSinkBucketSort(boolean removedReduceSinkBucketSort) public DPSortState getDpSortState() { return dpSortState; } - + @Explain(displayName = "Dp Sort State") + public String getDpSortStateString() { + return getDpSortState() == DPSortState.NONE ? null : getDpSortState().toString(); + } public void setDpSortState(DPSortState dpSortState) { this.dpSortState = dpSortState; } @@ -450,7 +453,10 @@ public void setWriteType(AcidUtils.Operation type) { public AcidUtils.Operation getWriteType() { return writeType; } - + @Explain(displayName = "Write Type") + public String getWriteTypeString() { + return getWriteType() == AcidUtils.Operation.NOT_ACID ? null : getWriteType().toString(); + } public void setTransactionId(long id) { txnId = id; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 05d2c81..14f2a12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -765,7 +765,7 @@ public static ReduceSinkDesc getReduceSinkDesc( return new ReduceSinkDesc(keyCols, numKeys, valueCols, outputKeyCols, distinctColIndices, outputValCols, tag, partitionCols, numReducers, keyTable, - valueTable, writeType); + valueTable); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index d77a223..2c1d1a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -123,9 +123,6 @@ private ReducerTraits(int trait) { // Is reducer auto-parallelism unset (FIXED, UNIFORM, PARALLEL) private EnumSet reduceTraits = EnumSet.of(ReducerTraits.UNSET); - // Write type, since this needs to calculate buckets differently for updates and deletes - private AcidUtils.Operation writeType; - // whether this RS is deduplicated private transient boolean isDeduplicated = false; @@ -144,8 +141,7 @@ public ReduceSinkDesc(ArrayList keyCols, List> distinctColumnIndices, ArrayList outputValueColumnNames, int tag, ArrayList partitionCols, int numReducers, - final TableDesc keySerializeInfo, final TableDesc valueSerializeInfo, - AcidUtils.Operation writeType) { + final TableDesc keySerializeInfo, final TableDesc valueSerializeInfo) { this.keyCols = keyCols; this.numDistributionKeys = numDistributionKeys; this.valueCols = valueCols; @@ -159,7 +155,6 @@ public ReduceSinkDesc(ArrayList keyCols, this.distinctColumnIndices = distinctColumnIndices; this.setNumBuckets(-1); this.setBucketCols(null); - this.writeType = writeType; this.vectorDesc = null; } @@ -472,10 +467,6 @@ public final void setReducerTraits(EnumSet traits) { } } - public AcidUtils.Operation getWriteType() { - return writeType; - } - public boolean isDeduplicated() { return isDeduplicated; } diff --git ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 1aef7ac..05b6fc4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -266,7 +266,7 @@ void run(HiveConf conf, String jobName, Table t, StorageDescriptor sd, if (parsedDeltas.size() == 0 && dir.getOriginalFiles() == null) { // Skip compaction if there's no delta files AND there's no original files - LOG.error("No delta files or original files found to compact in " + sd.getLocation()); + LOG.error("No delta files or original files found to compact in " + sd.getLocation() + " for compactionId=" + ci.id); return; } diff --git ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q index 414d070..bf8cbc8 100644 --- ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q +++ ql/src/test/queries/clientpositive/dynpart_sort_optimization_acid.q @@ -7,131 +7,131 @@ set hive.exec.dynamic.partition.mode=nonstrict; set hive.optimize.sort.dynamic.partition=false; -- single level partition, sorted dynamic partition disabled -drop table acid; -CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); -insert into table acid partition(ds) select key,value,ds from srcpart; +drop table if exists acid_part; +CREATE TABLE acid_part(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_part partition(ds) select key,value,ds from srcpart; -- explicitly set statistics to avoid flakiness -alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); -select count(*) from acid where ds='2008-04-08'; +alter table acid_part partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); +select count(*) from acid_part where ds='2008-04-08'; -insert into table acid partition(ds='2008-04-08') values("foo", "bar"); -select count(*) from acid where ds='2008-04-08'; +insert into table acid_part partition(ds='2008-04-08') values("foo", "bar"); +select count(*) from acid_part where ds='2008-04-08'; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; -select count(*) from acid where ds='2008-04-08'; +explain update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08'; +update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08'; +select count(*) from acid_part where ds='2008-04-08'; -explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); -update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); -select count(*) from acid where ds in ('2008-04-08'); +explain update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); +update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); +select count(*) from acid_part where ds in ('2008-04-08'); -delete from acid where key = 'foo' and ds='2008-04-08'; -select count(*) from acid where ds='2008-04-08'; +delete from acid_part where key = 'foo' and ds='2008-04-08'; +select count(*) from acid_part where ds='2008-04-08'; set hive.optimize.sort.dynamic.partition=true; -- single level partition, sorted dynamic partition enabled -drop table acid; -CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); -insert into table acid partition(ds) select key,value,ds from srcpart; -alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); -select count(*) from acid where ds='2008-04-08'; +drop table if exists acid_part_sdpo; +CREATE TABLE acid_part_sdpo(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_part_sdpo partition(ds) select key,value,ds from srcpart; +alter table acid_part_sdpo partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); +select count(*) from acid_part_sdpo where ds='2008-04-08'; -insert into table acid partition(ds='2008-04-08') values("foo", "bar"); -select count(*) from acid where ds='2008-04-08'; +insert into table acid_part_sdpo partition(ds='2008-04-08') values("foo", "bar"); +select count(*) from acid_part_sdpo where ds='2008-04-08'; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08'; -select count(*) from acid where ds='2008-04-08'; +explain update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08'; +update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08'; +select count(*) from acid_part_sdpo where ds='2008-04-08'; -explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); -update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); -select count(*) from acid where ds in ('2008-04-08'); +explain update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); +update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08'); +select count(*) from acid_part_sdpo where ds in ('2008-04-08'); -delete from acid where key = 'foo' and ds='2008-04-08'; -select count(*) from acid where ds='2008-04-08'; +delete from acid_part_sdpo where key = 'foo' and ds='2008-04-08'; +select count(*) from acid_part_sdpo where ds='2008-04-08'; set hive.optimize.sort.dynamic.partition=false; -- 2 level partition, sorted dynamic partition disabled -drop table acid; -CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); -insert into table acid partition(ds,hr) select * from srcpart; -alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); -select count(*) from acid where ds='2008-04-08' and hr=11; +drop table if exists acid_2L_part; +CREATE TABLE acid_2L_part(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_2L_part partition(ds,hr) select * from srcpart; +alter table acid_2L_part partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); +select count(*) from acid_2L_part where ds='2008-04-08' and hr=11; -insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar"); -select count(*) from acid where ds='2008-04-08' and hr=11; +insert into table acid_2L_part partition(ds='2008-04-08',hr=11) values("foo", "bar"); +select count(*) from acid_2L_part where ds='2008-04-08' and hr=11; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; -select count(*) from acid where ds='2008-04-08' and hr=11; +explain update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +select count(*) from acid_2L_part where ds='2008-04-08' and hr=11; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; -select count(*) from acid where ds='2008-04-08' and hr>=11; +explain update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +select count(*) from acid_2L_part where ds='2008-04-08' and hr>=11; -delete from acid where key = 'foo' and ds='2008-04-08' and hr=11; -select count(*) from acid where ds='2008-04-08' and hr=11; +delete from acid_2L_part where key = 'foo' and ds='2008-04-08' and hr=11; +select count(*) from acid_2L_part where ds='2008-04-08' and hr=11; -- test with bucketing column not in select list explain -delete from acid where value = 'bar'; -delete from acid where value = 'bar'; -select count(*) from acid; +delete from acid_2L_part where value = 'bar'; +delete from acid_2L_part where value = 'bar'; +select count(*) from acid_2L_part; set hive.optimize.sort.dynamic.partition=true; -- 2 level partition, sorted dynamic partition enabled -drop table acid; -CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); -insert into table acid partition(ds,hr) select * from srcpart; -alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); -select count(*) from acid where ds='2008-04-08' and hr=11; +drop table if exists acid_2L_part_sdpo; +CREATE TABLE acid_2L_part_sdpo(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_2L_part_sdpo partition(ds,hr) select * from srcpart; +alter table acid_2L_part_sdpo partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); +select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11; -insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar"); -select count(*) from acid where ds='2008-04-08' and hr=11; +insert into table acid_2L_part_sdpo partition(ds='2008-04-08',hr=11) values("foo", "bar"); +select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; -select count(*) from acid where ds='2008-04-08' and hr=11; +explain update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; -select count(*) from acid where ds='2008-04-08' and hr>=11; +explain update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr>=11; -delete from acid where key = 'foo' and ds='2008-04-08' and hr=11; -select count(*) from acid where ds='2008-04-08' and hr=11; +delete from acid_2L_part_sdpo where key = 'foo' and ds='2008-04-08' and hr=11; +select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11; -- test with bucketing column not in select list explain -delete from acid where value = 'bar'; -delete from acid where value = 'bar'; -select count(*) from acid; +delete from acid_2L_part_sdpo where value = 'bar'; +delete from acid_2L_part_sdpo where value = 'bar'; +select count(*) from acid_2L_part_sdpo; set hive.optimize.sort.dynamic.partition=true; set hive.optimize.constant.propagation=false; -- 2 level partition, sorted dynamic partition enabled, constant propagation disabled -drop table acid; -CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); -insert into table acid partition(ds,hr) select * from srcpart; -alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); -select count(*) from acid where ds='2008-04-08' and hr=11; +drop table if exists acid_2L_part_sdpo_no_cp; +CREATE TABLE acid_2L_part_sdpo_no_cp(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true'); +insert into table acid_2L_part_sdpo_no_cp partition(ds,hr) select * from srcpart; +alter table acid_2L_part_sdpo_no_cp partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000'); +select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11; -insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar"); -select count(*) from acid where ds='2008-04-08' and hr=11; +insert into table acid_2L_part_sdpo_no_cp partition(ds='2008-04-08',hr=11) values("foo", "bar"); +select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; -select count(*) from acid where ds='2008-04-08' and hr=11; +explain update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11; +select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11; -explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; -update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; -select count(*) from acid where ds='2008-04-08' and hr>=11; +explain update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11; +select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr>=11; -delete from acid where key = 'foo' and ds='2008-04-08' and hr=11; -select count(*) from acid where ds='2008-04-08' and hr=11; +delete from acid_2L_part_sdpo_no_cp where key = 'foo' and ds='2008-04-08' and hr=11; +select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11; set hive.optimize.sort.dynamic.partition=true; diff --git ql/src/test/results/clientpositive/autoColumnStats_4.q.out ql/src/test/results/clientpositive/autoColumnStats_4.q.out index c7b9b4f..3ae2f20 100644 --- ql/src/test/results/clientpositive/autoColumnStats_4.q.out +++ ql/src/test/results/clientpositive/autoColumnStats_4.q.out @@ -110,6 +110,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acid_dtt + Write Type: INSERT Select Operator expressions: _col0 (type: int), _col1 (type: varchar(128)) outputColumnNames: a, b diff --git ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 0b6e992..76d0b7b 100644 --- ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -47,6 +47,7 @@ STAGE PLANS: Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index eaa394d..0e16ff1 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -198,6 +198,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -290,6 +291,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -363,6 +365,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -435,6 +438,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -575,6 +579,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -667,6 +672,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -740,6 +746,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -812,6 +819,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat @@ -1426,6 +1434,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1520,6 +1529,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1676,6 +1686,7 @@ STAGE PLANS: Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2133,6 +2144,7 @@ STAGE PLANS: Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 95a4e0f..1ef0740 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -155,6 +155,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -247,6 +248,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -320,6 +322,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -392,6 +395,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -532,6 +536,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -624,6 +629,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -697,6 +703,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -769,6 +776,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1383,6 +1391,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1477,6 +1486,7 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -1633,6 +1643,7 @@ STAGE PLANS: Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2090,6 +2101,7 @@ STAGE PLANS: Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2464,6 +2476,7 @@ STAGE PLANS: Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2538,6 +2551,7 @@ STAGE PLANS: Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2612,6 +2626,7 @@ STAGE PLANS: Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2686,6 +2701,7 @@ STAGE PLANS: Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2760,6 +2776,7 @@ STAGE PLANS: Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -2834,6 +2851,7 @@ STAGE PLANS: Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index 41a7709..667d980 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -120,6 +120,7 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -357,6 +358,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Dp Sort State: PARTITION_SORTED Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index b7679f1..788854a 100644 --- ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -1,78 +1,78 @@ -PREHOOK: query: drop table acid +PREHOOK: query: drop table if exists acid_part PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table acid +POSTHOOK: query: drop table if exists acid_part POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: query: CREATE TABLE acid_part(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@acid -POSTHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: Output: default@acid_part +POSTHOOK: query: CREATE TABLE acid_part(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@acid -PREHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart +POSTHOOK: Output: default@acid_part +PREHOOK: query: insert into table acid_part partition(ds) select key,value,ds from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid -POSTHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart +PREHOOK: Output: default@acid_part +POSTHOOK: query: insert into table acid_part partition(ds) select key,value,ds from srcpart POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-09 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +POSTHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: Output: default@acid_part@ds=2008-04-09 +POSTHOOK: Lineage: acid_part PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_part PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_part PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_part PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table acid_part partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +PREHOOK: Input: default@acid_part +PREHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: query: alter table acid_part partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 +POSTHOOK: Output: default@acid_part@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### 1000 -PREHOOK: query: insert into table acid partition(ds='2008-04-08') values("foo", "bar") +PREHOOK: query: insert into table acid_part partition(ds='2008-04-08') values("foo", "bar") PREHOOK: type: QUERY -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: insert into table acid partition(ds='2008-04-08') values("foo", "bar") +PREHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: query: insert into table acid_part partition(ds='2008-04-08') values("foo", "bar") POSTHOOK: type: QUERY -POSTHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: Lineage: acid_part PARTITION(ds=2008-04-08).key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_part PARTITION(ds=2008-04-08).value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select count(*) from acid_part where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +PREHOOK: query: explain update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +POSTHOOK: query: explain update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -91,7 +91,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_part Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) @@ -121,7 +121,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -136,35 +137,35 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +PREHOOK: query: update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 +PREHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: query: update acid_part set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 +POSTHOOK: Output: default@acid_part@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +PREHOOK: query: explain update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +POSTHOOK: query: explain update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -183,7 +184,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_part Statistics: Num rows: 1600 Data size: 312400 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) @@ -214,7 +215,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -229,132 +231,128 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +PREHOOK: query: update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 +PREHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: query: update acid_part set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds in ('2008-04-08') +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 +POSTHOOK: Output: default@acid_part@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part where ds in ('2008-04-08') PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds in ('2008-04-08') +POSTHOOK: query: select count(*) from acid_part where ds in ('2008-04-08') POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' +PREHOOK: query: delete from acid_part where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 +PREHOOK: Output: default@acid_part@ds=2008-04-08 +POSTHOOK: query: delete from acid_part where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 +POSTHOOK: Output: default@acid_part@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part +PREHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part +POSTHOOK: Input: default@acid_part@ds=2008-04-08 #### A masked pattern was here #### 1000 -PREHOOK: query: drop table acid +PREHOOK: query: drop table if exists acid_part_sdpo PREHOOK: type: DROPTABLE -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid -POSTHOOK: query: drop table acid +POSTHOOK: query: drop table if exists acid_part_sdpo POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@acid -POSTHOOK: Output: default@acid -PREHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: query: CREATE TABLE acid_part_sdpo(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@acid -POSTHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: Output: default@acid_part_sdpo +POSTHOOK: query: CREATE TABLE acid_part_sdpo(key string, value string) PARTITIONED BY(ds string) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@acid -PREHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart +POSTHOOK: Output: default@acid_part_sdpo +PREHOOK: query: insert into table acid_part_sdpo partition(ds) select key,value,ds from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid -POSTHOOK: query: insert into table acid partition(ds) select key,value,ds from srcpart +PREHOOK: Output: default@acid_part_sdpo +POSTHOOK: query: insert into table acid_part_sdpo partition(ds) select key,value,ds from srcpart POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-09 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-09 +POSTHOOK: Lineage: acid_part_sdpo PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_part_sdpo PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_part_sdpo PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_part_sdpo PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table acid_part_sdpo partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: query: alter table acid_part_sdpo partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### 1000 -PREHOOK: query: insert into table acid partition(ds='2008-04-08') values("foo", "bar") +PREHOOK: query: insert into table acid_part_sdpo partition(ds='2008-04-08') values("foo", "bar") PREHOOK: type: QUERY -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: insert into table acid partition(ds='2008-04-08') values("foo", "bar") +PREHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: query: insert into table acid_part_sdpo partition(ds='2008-04-08') values("foo", "bar") POSTHOOK: type: QUERY -POSTHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08).value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: Lineage: acid_part_sdpo PARTITION(ds=2008-04-08).key SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_part_sdpo PARTITION(ds=2008-04-08).value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +PREHOOK: query: explain update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +POSTHOOK: query: explain update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -373,7 +371,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_part_sdpo Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) @@ -403,7 +401,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part_sdpo + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -418,35 +417,35 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part_sdpo Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +PREHOOK: query: update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: query: update acid_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +PREHOOK: query: explain update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +POSTHOOK: query: explain update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -465,7 +464,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_part_sdpo Statistics: Num rows: 1600 Data size: 312400 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) @@ -490,12 +489,14 @@ STAGE PLANS: Statistics: Num rows: 800 Data size: 286400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 800 Data size: 286400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part_sdpo + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -510,141 +511,137 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_part_sdpo Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +PREHOOK: query: update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08') PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds in ('2008-04-08') +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: query: update acid_part_sdpo set value = 'bar' where key = 'foo' and ds in ('2008-04-08') POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds in ('2008-04-08') +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part_sdpo where ds in ('2008-04-08') PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds in ('2008-04-08') +POSTHOOK: query: select count(*) from acid_part_sdpo where ds in ('2008-04-08') POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### 1001 -PREHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' +PREHOOK: query: delete from acid_part_sdpo where key = 'foo' and ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 -PREHOOK: Output: default@acid@ds=2008-04-08 -POSTHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: query: delete from acid_part_sdpo where key = 'foo' and ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 -POSTHOOK: Output: default@acid@ds=2008-04-08 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 +POSTHOOK: Output: default@acid_part_sdpo@ds=2008-04-08 +PREHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08 +PREHOOK: Input: default@acid_part_sdpo +PREHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' +POSTHOOK: query: select count(*) from acid_part_sdpo where ds='2008-04-08' POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08 +POSTHOOK: Input: default@acid_part_sdpo +POSTHOOK: Input: default@acid_part_sdpo@ds=2008-04-08 #### A masked pattern was here #### 1000 -PREHOOK: query: drop table acid +PREHOOK: query: drop table if exists acid_2L_part PREHOOK: type: DROPTABLE -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid -POSTHOOK: query: drop table acid +POSTHOOK: query: drop table if exists acid_2L_part POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@acid -POSTHOOK: Output: default@acid -PREHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: query: CREATE TABLE acid_2L_part(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@acid -POSTHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: Output: default@acid_2L_part +POSTHOOK: query: CREATE TABLE acid_2L_part(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@acid -PREHOOK: query: insert into table acid partition(ds,hr) select * from srcpart +POSTHOOK: Output: default@acid_2L_part +PREHOOK: query: insert into table acid_2L_part partition(ds,hr) select * from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid -POSTHOOK: query: insert into table acid partition(ds,hr) select * from srcpart +PREHOOK: Output: default@acid_2l_part +POSTHOOK: query: insert into table acid_2L_part partition(ds,hr) select * from srcpart POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table acid_2L_part partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +PREHOOK: Input: default@acid_2l_part +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: query: alter table acid_2L_part partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +PREHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar") +PREHOOK: query: insert into table acid_2L_part partition(ds='2008-04-08',hr=11) values("foo", "bar") PREHOOK: type: QUERY -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar") +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into table acid_2L_part partition(ds='2008-04-08',hr=11) values("foo", "bar") POSTHOOK: type: QUERY -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_2l_part PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: explain update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +POSTHOOK: query: explain update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -663,7 +660,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) @@ -693,7 +690,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -709,35 +707,35 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: query: update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: query: explain update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: explain update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -756,7 +754,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part Statistics: Num rows: 3200 Data size: 48800 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) @@ -787,7 +785,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -803,64 +802,64 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: query: update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: query: update acid_2L_part set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr>=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +PREHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr>=11 +POSTHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1001 -PREHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: delete from acid_2L_part where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: query: delete from acid_2L_part where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: explain -delete from acid where value = 'bar' +delete from acid_2L_part where value = 'bar' PREHOOK: type: QUERY POSTHOOK: query: explain -delete from acid where value = 'bar' +delete from acid_2L_part where value = 'bar' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -879,7 +878,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) @@ -910,7 +909,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part + Write Type: DELETE Stage: Stage-2 Dependency Collection @@ -926,138 +926,134 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: delete from acid where value = 'bar' -PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-09/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: query: delete from acid where value = 'bar' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12 -PREHOOK: query: select count(*) from acid -PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12 +PREHOOK: query: delete from acid_2L_part where value = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=11 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=12 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-09/hr=11 +PREHOOK: Output: default@acid_2l_part@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from acid_2L_part where value = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@acid_2l_part@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from acid_2L_part +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_2l_part +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=11 +PREHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_2L_part +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_2l_part +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@acid_2l_part@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2000 -PREHOOK: query: drop table acid +PREHOOK: query: drop table if exists acid_2L_part_sdpo PREHOOK: type: DROPTABLE -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid -POSTHOOK: query: drop table acid +POSTHOOK: query: drop table if exists acid_2L_part_sdpo POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@acid -POSTHOOK: Output: default@acid -PREHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: query: CREATE TABLE acid_2L_part_sdpo(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@acid -POSTHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: Output: default@acid_2L_part_sdpo +POSTHOOK: query: CREATE TABLE acid_2L_part_sdpo(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@acid -PREHOOK: query: insert into table acid partition(ds,hr) select * from srcpart +POSTHOOK: Output: default@acid_2L_part_sdpo +PREHOOK: query: insert into table acid_2L_part_sdpo partition(ds,hr) select * from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid -POSTHOOK: query: insert into table acid partition(ds,hr) select * from srcpart +PREHOOK: Output: default@acid_2l_part_sdpo +POSTHOOK: query: insert into table acid_2L_part_sdpo partition(ds,hr) select * from srcpart POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table acid_2L_part_sdpo partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: query: alter table acid_2L_part_sdpo partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +PREHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar") +PREHOOK: query: insert into table acid_2L_part_sdpo partition(ds='2008-04-08',hr=11) values("foo", "bar") PREHOOK: type: QUERY -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar") +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into table acid_2L_part_sdpo partition(ds='2008-04-08',hr=11) values("foo", "bar") POSTHOOK: type: QUERY -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_2l_part_sdpo PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: explain update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +POSTHOOK: query: explain update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1076,7 +1072,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part_sdpo Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 'foo') (type: boolean) @@ -1106,7 +1102,8 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -1122,35 +1119,35 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: query: update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: query: explain update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: explain update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1169,7 +1166,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part_sdpo Statistics: Num rows: 3200 Data size: 48800 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) @@ -1194,12 +1191,14 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 435200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1600 Data size: 435200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -1215,64 +1214,64 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: query: update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: query: update acid_2L_part_sdpo set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr>=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +PREHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr>=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1001 -PREHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: delete from acid_2L_part_sdpo where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: query: delete from acid_2L_part_sdpo where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 PREHOOK: query: explain -delete from acid where value = 'bar' +delete from acid_2L_part_sdpo where value = 'bar' PREHOOK: type: QUERY POSTHOOK: query: explain -delete from acid where value = 'bar' +delete from acid_2L_part_sdpo where value = 'bar' POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1291,7 +1290,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part_sdpo Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) @@ -1316,12 +1315,14 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 579200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1600 Data size: 579200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo + Write Type: DELETE Stage: Stage-2 Dependency Collection @@ -1337,138 +1338,134 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: delete from acid where value = 'bar' -PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-09/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: query: delete from acid where value = 'bar' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12 -PREHOOK: query: select count(*) from acid -PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-09/hr=12 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid -POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-09/hr=12 +PREHOOK: query: delete from acid_2L_part_sdpo where value = 'bar' +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 +POSTHOOK: query: delete from acid_2L_part_sdpo where value = 'bar' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 +PREHOOK: query: select count(*) from acid_2L_part_sdpo +PREHOOK: type: QUERY +PREHOOK: Input: default@acid_2l_part_sdpo +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from acid_2L_part_sdpo +POSTHOOK: type: QUERY +POSTHOOK: Input: default@acid_2l_part_sdpo +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2000 -PREHOOK: query: drop table acid +PREHOOK: query: drop table if exists acid_2L_part_sdpo_no_cp PREHOOK: type: DROPTABLE -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid -POSTHOOK: query: drop table acid +POSTHOOK: query: drop table if exists acid_2L_part_sdpo_no_cp POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@acid -POSTHOOK: Output: default@acid -PREHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: query: CREATE TABLE acid_2L_part_sdpo_no_cp(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') PREHOOK: type: CREATETABLE PREHOOK: Output: database:default -PREHOOK: Output: default@acid -POSTHOOK: query: CREATE TABLE acid(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: Output: default@acid_2L_part_sdpo_no_cp +POSTHOOK: query: CREATE TABLE acid_2L_part_sdpo_no_cp(key string, value string) PARTITIONED BY(ds string, hr int) CLUSTERED BY(key) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true') POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default -POSTHOOK: Output: default@acid -PREHOOK: query: insert into table acid partition(ds,hr) select * from srcpart +POSTHOOK: Output: default@acid_2L_part_sdpo_no_cp +PREHOOK: query: insert into table acid_2L_part_sdpo_no_cp partition(ds,hr) select * from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: default@acid -POSTHOOK: query: insert into table acid partition(ds,hr) select * from srcpart +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp +POSTHOOK: query: insert into table acid_2L_part_sdpo_no_cp partition(ds,hr) select * from srcpart POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-09/hr=12 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table acid_2L_part_sdpo_no_cp partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS -PREHOOK: Input: default@acid -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: alter table acid partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +POSTHOOK: query: alter table acid_2L_part_sdpo_no_cp partition(ds='2008-04-08') update statistics set('numRows'='1600', 'rawDataSize'='18000') POSTHOOK: type: ALTERTABLE_UPDATEPARTSTATS -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +PREHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 -PREHOOK: query: insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar") +PREHOOK: query: insert into table acid_2L_part_sdpo_no_cp partition(ds='2008-04-08',hr=11) values("foo", "bar") PREHOOK: type: QUERY -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: insert into table acid partition(ds='2008-04-08',hr=11) values("foo", "bar") +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: query: insert into table acid_2L_part_sdpo_no_cp partition(ds='2008-04-08',hr=11) values("foo", "bar") POSTHOOK: type: QUERY -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: acid PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: acid_2l_part_sdpo_no_cp PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: explain update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +POSTHOOK: query: explain update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1487,7 +1484,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part_sdpo_no_cp Statistics: Num rows: 1600 Data size: 318800 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) @@ -1513,12 +1510,14 @@ STAGE PLANS: Statistics: Num rows: 800 Data size: 359200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 800 Data size: 359200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo_no_cp + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -1534,35 +1533,35 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo_no_cp Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: query: update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### 501 -PREHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: query: explain update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -POSTHOOK: query: explain update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +POSTHOOK: query: explain update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1581,7 +1580,7 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: acid + alias: acid_2l_part_sdpo_no_cp Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) @@ -1607,12 +1606,14 @@ STAGE PLANS: Statistics: Num rows: 1600 Data size: 718400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + Dp Sort State: PARTITION_BUCKET_SORTED Statistics: Num rows: 1600 Data size: 718400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo_no_cp + Write Type: UPDATE Stage: Stage-2 Dependency Collection @@ -1628,56 +1629,56 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde - name: default.acid + name: default.acid_2l_part_sdpo_no_cp Stage: Stage-3 Stats-Aggr Operator -PREHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: query: update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: query: update acid set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +POSTHOOK: query: update acid_2L_part_sdpo_no_cp set value = 'bar' where key = 'foo' and ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=12 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr>=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 +PREHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr>=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Input: default@acid@ds=2008-04-08/hr=12 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr>=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr>=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=12 #### A masked pattern was here #### 1001 -PREHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: query: delete from acid_2L_part_sdpo_no_cp where key = 'foo' and ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 -PREHOOK: Output: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: query: delete from acid where key = 'foo' and ds='2008-04-08' and hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: query: delete from acid_2L_part_sdpo_no_cp where key = 'foo' and ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@acid@ds=2008-04-08/hr=11 -PREHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 +PREHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 PREHOOK: type: QUERY -PREHOOK: Input: default@acid -PREHOOK: Input: default@acid@ds=2008-04-08/hr=11 +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp +PREHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### -POSTHOOK: query: select count(*) from acid where ds='2008-04-08' and hr=11 +POSTHOOK: query: select count(*) from acid_2L_part_sdpo_no_cp where ds='2008-04-08' and hr=11 POSTHOOK: type: QUERY -POSTHOOK: Input: default@acid -POSTHOOK: Input: default@acid@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp +POSTHOOK: Input: default@acid_2l_part_sdpo_no_cp@ds=2008-04-08/hr=11 #### A masked pattern was here #### 500 diff --git ql/src/test/results/clientpositive/llap/sqlmerge.q.out ql/src/test/results/clientpositive/llap/sqlmerge.q.out index 2a3d7db..c73e0d2 100644 --- ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -156,6 +156,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl + Write Type: DELETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -171,6 +172,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl + Write Type: UPDATE Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -210,6 +212,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl + Write Type: INSERT Stage: Stage-5 Dependency Collection @@ -350,6 +353,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl + Write Type: INSERT Stage: Stage-2 Dependency Collection diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 6945a67..ba44bae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -617,6 +617,11 @@ public static int getBucketNumber(Object[] bucketFields, ObjectInspector[] bucke * @param hashCode as produced by {@link #getBucketHashCode(Object[], ObjectInspector[])} */ public static int getBucketNumber(int hashCode, int numberOfBuckets) { + if(numberOfBuckets <= 0) { + //note that (X % 0) is illegal and (X % -1) = 0 + // -1 is a common default when the value is missing + throw new IllegalArgumentException("Number of Buckets must be > 0"); + } return (hashCode & Integer.MAX_VALUE) % numberOfBuckets; } /**